]>
Commit | Line | Data |
---|---|---|
20c8ccb1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
a656c8ef | 2 | /* |
043405e1 CO |
3 | * Kernel-based Virtual Machine driver for Linux |
4 | * | |
5 | * This header defines architecture specific interfaces, x86 version | |
043405e1 CO |
6 | */ |
7 | ||
1965aae3 PA |
8 | #ifndef _ASM_X86_KVM_HOST_H |
9 | #define _ASM_X86_KVM_HOST_H | |
043405e1 | 10 | |
34c16eec ZX |
11 | #include <linux/types.h> |
12 | #include <linux/mm.h> | |
e930bffe | 13 | #include <linux/mmu_notifier.h> |
229456fc | 14 | #include <linux/tracepoint.h> |
f5f48ee1 | 15 | #include <linux/cpumask.h> |
f5132b01 | 16 | #include <linux/irq_work.h> |
447ae316 | 17 | #include <linux/irq.h> |
34c16eec ZX |
18 | |
19 | #include <linux/kvm.h> | |
20 | #include <linux/kvm_para.h> | |
edf88417 | 21 | #include <linux/kvm_types.h> |
f5132b01 | 22 | #include <linux/perf_event.h> |
d828199e MT |
23 | #include <linux/pvclock_gtod.h> |
24 | #include <linux/clocksource.h> | |
87276880 | 25 | #include <linux/irqbypass.h> |
5c919412 | 26 | #include <linux/hyperv.h> |
34c16eec | 27 | |
7d669f50 | 28 | #include <asm/apic.h> |
50d0a0f9 | 29 | #include <asm/pvclock-abi.h> |
e01a1b57 | 30 | #include <asm/desc.h> |
0bed3b56 | 31 | #include <asm/mtrr.h> |
9962d032 | 32 | #include <asm/msr-index.h> |
3ee89722 | 33 | #include <asm/asm.h> |
21ebbeda | 34 | #include <asm/kvm_page_track.h> |
95c7b77d | 35 | #include <asm/kvm_vcpu_regs.h> |
5a485803 | 36 | #include <asm/hyperv-tlfs.h> |
e01a1b57 | 37 | |
741cbbae PB |
38 | #define __KVM_HAVE_ARCH_VCPU_DEBUGFS |
39 | ||
074c82c8 | 40 | #define KVM_MAX_VCPUS 1024 |
1dbaf04c | 41 | #define KVM_SOFT_MAX_VCPUS 710 |
4ddacd52 EH |
42 | |
43 | /* | |
44 | * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs | |
45 | * might be larger than the actual number of VCPUs because the | |
46 | * APIC ID encodes CPU topology information. | |
47 | * | |
48 | * In the worst case, we'll need less than one extra bit for the | |
49 | * Core ID, and less than one extra bit for the Package (Die) ID, | |
50 | * so ratio of 4 should be enough. | |
51 | */ | |
52 | #define KVM_VCPU_ID_RATIO 4 | |
53 | #define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) | |
54 | ||
0743247f AW |
55 | /* memory slots that are not exposed to userspace */ |
56 | #define KVM_PRIVATE_MEM_SLOTS 3 | |
93a5cef0 | 57 | |
b401ee0b | 58 | #define KVM_HALT_POLL_NS_DEFAULT 200000 |
69a9f69b | 59 | |
8175e5b7 AG |
60 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS |
61 | ||
3c9bd400 JZ |
62 | #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ |
63 | KVM_DIRTY_LOG_INITIALLY_SET) | |
64 | ||
fe6b6bc8 CQ |
65 | #define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ |
66 | KVM_BUS_LOCK_DETECTION_EXIT) | |
67 | ||
2860c4b1 | 68 | /* x86-specific vcpu->requests bit members */ |
2387149e AJ |
69 | #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) |
70 | #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) | |
71 | #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) | |
72 | #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) | |
73 | #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) | |
727a7e27 | 74 | #define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) |
2387149e AJ |
75 | #define KVM_REQ_EVENT KVM_ARCH_REQ(6) |
76 | #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) | |
77 | #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) | |
78 | #define KVM_REQ_NMI KVM_ARCH_REQ(9) | |
79 | #define KVM_REQ_PMU KVM_ARCH_REQ(10) | |
80 | #define KVM_REQ_PMI KVM_ARCH_REQ(11) | |
81 | #define KVM_REQ_SMI KVM_ARCH_REQ(12) | |
82 | #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) | |
83 | #define KVM_REQ_MCLOCK_INPROGRESS \ | |
84 | KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
85 | #define KVM_REQ_SCAN_IOAPIC \ | |
86 | KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
87 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) | |
88 | #define KVM_REQ_APIC_PAGE_RELOAD \ | |
89 | KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
90 | #define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) | |
91 | #define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) | |
92 | #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) | |
93 | #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) | |
94 | #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) | |
e40ff1d6 | 95 | #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) |
729c15c2 | 96 | #define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24) |
8df14af4 SS |
97 | #define KVM_REQ_APICV_UPDATE \ |
98 | KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
eeeb4f67 | 99 | #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) |
07ffaf34 | 100 | #define KVM_REQ_TLB_FLUSH_GUEST \ |
eeeb4f67 | 101 | KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) |
557a961a | 102 | #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) |
1a155254 | 103 | #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) |
a85863c2 MS |
104 | #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ |
105 | KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
2860c4b1 | 106 | |
cfec82cb JR |
107 | #define CR0_RESERVED_BITS \ |
108 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | |
109 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | |
110 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | |
111 | ||
cfec82cb JR |
112 | #define CR4_RESERVED_BITS \ |
113 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | |
114 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | |
ad756a16 | 115 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
afcbf13f | 116 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ |
fd8cb433 | 117 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ |
ae3e61e1 | 118 | | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP)) |
cfec82cb JR |
119 | |
120 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | |
121 | ||
122 | ||
cd6e8f87 | 123 | |
cd6e8f87 | 124 | #define INVALID_PAGE (~(hpa_t)0) |
dd180b3e XG |
125 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
126 | ||
cd6e8f87 | 127 | #define UNMAPPED_GVA (~(gpa_t)0) |
c74ad08f | 128 | #define INVALID_GPA (~(gpa_t)0) |
cd6e8f87 | 129 | |
ec04b260 | 130 | /* KVM Hugepage definitions for x86 */ |
3bae0459 SC |
131 | #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G |
132 | #define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) | |
82855413 JR |
133 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) |
134 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | |
ec04b260 JR |
135 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
136 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | |
137 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | |
05da4558 | 138 | |
d657a98e | 139 | #define KVM_PERMILLE_MMU_PAGES 20 |
bc8a3d89 | 140 | #define KVM_MIN_ALLOC_MMU_PAGES 64UL |
114df303 | 141 | #define KVM_MMU_HASH_SHIFT 12 |
1ae0a13d | 142 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) |
d657a98e ZX |
143 | #define KVM_MIN_FREE_MMU_PAGES 5 |
144 | #define KVM_REFILL_PAGES 25 | |
3f4e3eb4 | 145 | #define KVM_MAX_CPUID_ENTRIES 256 |
0bed3b56 | 146 | #define KVM_NR_FIXED_MTRR_REGION 88 |
0d234daf | 147 | #define KVM_NR_VAR_MTRR 8 |
d657a98e | 148 | |
af585b92 GN |
149 | #define ASYNC_PF_PER_VCPU 64 |
150 | ||
5fdbf976 | 151 | enum kvm_reg { |
95c7b77d SC |
152 | VCPU_REGS_RAX = __VCPU_REGS_RAX, |
153 | VCPU_REGS_RCX = __VCPU_REGS_RCX, | |
154 | VCPU_REGS_RDX = __VCPU_REGS_RDX, | |
155 | VCPU_REGS_RBX = __VCPU_REGS_RBX, | |
156 | VCPU_REGS_RSP = __VCPU_REGS_RSP, | |
157 | VCPU_REGS_RBP = __VCPU_REGS_RBP, | |
158 | VCPU_REGS_RSI = __VCPU_REGS_RSI, | |
159 | VCPU_REGS_RDI = __VCPU_REGS_RDI, | |
2b3ccfa0 | 160 | #ifdef CONFIG_X86_64 |
95c7b77d SC |
161 | VCPU_REGS_R8 = __VCPU_REGS_R8, |
162 | VCPU_REGS_R9 = __VCPU_REGS_R9, | |
163 | VCPU_REGS_R10 = __VCPU_REGS_R10, | |
164 | VCPU_REGS_R11 = __VCPU_REGS_R11, | |
165 | VCPU_REGS_R12 = __VCPU_REGS_R12, | |
166 | VCPU_REGS_R13 = __VCPU_REGS_R13, | |
167 | VCPU_REGS_R14 = __VCPU_REGS_R14, | |
168 | VCPU_REGS_R15 = __VCPU_REGS_R15, | |
2b3ccfa0 | 169 | #endif |
5fdbf976 | 170 | VCPU_REGS_RIP, |
f8845541 | 171 | NR_VCPU_REGS, |
2b3ccfa0 | 172 | |
6de4f3ad | 173 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, |
bd31fe49 | 174 | VCPU_EXREG_CR0, |
aff48baa | 175 | VCPU_EXREG_CR3, |
f98c1e77 | 176 | VCPU_EXREG_CR4, |
6de12732 | 177 | VCPU_EXREG_RFLAGS, |
2fb92db1 | 178 | VCPU_EXREG_SEGMENTS, |
5addc235 | 179 | VCPU_EXREG_EXIT_INFO_1, |
87915858 | 180 | VCPU_EXREG_EXIT_INFO_2, |
6de4f3ad AK |
181 | }; |
182 | ||
2b3ccfa0 | 183 | enum { |
81609e3e | 184 | VCPU_SREG_ES, |
2b3ccfa0 | 185 | VCPU_SREG_CS, |
81609e3e | 186 | VCPU_SREG_SS, |
2b3ccfa0 | 187 | VCPU_SREG_DS, |
2b3ccfa0 ZX |
188 | VCPU_SREG_FS, |
189 | VCPU_SREG_GS, | |
2b3ccfa0 ZX |
190 | VCPU_SREG_TR, |
191 | VCPU_SREG_LDTR, | |
192 | }; | |
193 | ||
1e9e2622 WL |
194 | enum exit_fastpath_completion { |
195 | EXIT_FASTPATH_NONE, | |
404d5d7b WL |
196 | EXIT_FASTPATH_REENTER_GUEST, |
197 | EXIT_FASTPATH_EXIT_HANDLED, | |
1e9e2622 | 198 | }; |
404d5d7b | 199 | typedef enum exit_fastpath_completion fastpath_t; |
1e9e2622 | 200 | |
2f728d66 SC |
201 | struct x86_emulate_ctxt; |
202 | struct x86_exception; | |
203 | enum x86_intercept; | |
204 | enum x86_intercept_stage; | |
2b3ccfa0 | 205 | |
42dbaa5a JK |
206 | #define KVM_NR_DB_REGS 4 |
207 | ||
e8ea85fb | 208 | #define DR6_BUS_LOCK (1 << 11) |
42dbaa5a JK |
209 | #define DR6_BD (1 << 13) |
210 | #define DR6_BS (1 << 14) | |
cfb634fe | 211 | #define DR6_BT (1 << 15) |
6f43ed01 | 212 | #define DR6_RTM (1 << 16) |
9a3ecd5e CQ |
213 | /* |
214 | * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. | |
215 | * We can regard all the bits in DR6_FIXED_1 as active_low bits; | |
216 | * they will never be 0 for now, but when they are defined | |
217 | * in the future it will require no code change. | |
218 | * | |
219 | * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. | |
220 | */ | |
221 | #define DR6_ACTIVE_LOW 0xffff0ff0 | |
e8ea85fb | 222 | #define DR6_VOLATILE 0x0001e80f |
9a3ecd5e | 223 | #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) |
42dbaa5a JK |
224 | |
225 | #define DR7_BP_EN_MASK 0x000000ff | |
226 | #define DR7_GE (1 << 9) | |
227 | #define DR7_GD (1 << 13) | |
228 | #define DR7_FIXED_1 0x00000400 | |
6f43ed01 | 229 | #define DR7_VOLATILE 0xffff2bff |
42dbaa5a | 230 | |
7e582ccb ML |
231 | #define KVM_GUESTDBG_VALID_MASK \ |
232 | (KVM_GUESTDBG_ENABLE | \ | |
233 | KVM_GUESTDBG_SINGLESTEP | \ | |
234 | KVM_GUESTDBG_USE_HW_BP | \ | |
235 | KVM_GUESTDBG_USE_SW_BP | \ | |
236 | KVM_GUESTDBG_INJECT_BP | \ | |
61e5f69e ML |
237 | KVM_GUESTDBG_INJECT_DB | \ |
238 | KVM_GUESTDBG_BLOCKIRQ) | |
7e582ccb ML |
239 | |
240 | ||
c205fb7d NA |
241 | #define PFERR_PRESENT_BIT 0 |
242 | #define PFERR_WRITE_BIT 1 | |
243 | #define PFERR_USER_BIT 2 | |
244 | #define PFERR_RSVD_BIT 3 | |
245 | #define PFERR_FETCH_BIT 4 | |
be94f6b7 | 246 | #define PFERR_PK_BIT 5 |
00e7646c | 247 | #define PFERR_SGX_BIT 15 |
14727754 TL |
248 | #define PFERR_GUEST_FINAL_BIT 32 |
249 | #define PFERR_GUEST_PAGE_BIT 33 | |
c205fb7d NA |
250 | |
251 | #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) | |
252 | #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) | |
253 | #define PFERR_USER_MASK (1U << PFERR_USER_BIT) | |
254 | #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) | |
255 | #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) | |
be94f6b7 | 256 | #define PFERR_PK_MASK (1U << PFERR_PK_BIT) |
00e7646c | 257 | #define PFERR_SGX_MASK (1U << PFERR_SGX_BIT) |
14727754 TL |
258 | #define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT) |
259 | #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) | |
260 | ||
261 | #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ | |
14727754 TL |
262 | PFERR_WRITE_MASK | \ |
263 | PFERR_PRESENT_MASK) | |
c205fb7d | 264 | |
41383771 GN |
265 | /* apic attention bits */ |
266 | #define KVM_APIC_CHECK_VAPIC 0 | |
ae7a2a3f MT |
267 | /* |
268 | * The following bit is set with PV-EOI, unset on EOI. | |
269 | * We detect PV-EOI changes by guest by comparing | |
270 | * this bit with PV-EOI in guest memory. | |
271 | * See the implementation in apic_update_pv_eoi. | |
272 | */ | |
273 | #define KVM_APIC_PV_EOI_PENDING 1 | |
41383771 | 274 | |
d84f1e07 FW |
275 | struct kvm_kernel_irq_routing_entry; |
276 | ||
21ebbeda | 277 | /* |
616007c8 SC |
278 | * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page |
279 | * also includes TDP pages) to determine whether or not a page can be used in | |
280 | * the given MMU context. This is a subset of the overall kvm_mmu_role to | |
281 | * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating | |
282 | * 2 bytes per gfn instead of 4 bytes per gfn. | |
21ebbeda | 283 | * |
616007c8 SC |
284 | * Indirect upper-level shadow pages are tracked for write-protection via |
285 | * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create | |
286 | * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise | |
287 | * gfn_track will overflow and explosions will ensure. | |
288 | * | |
289 | * A unique shadow page (SP) for a gfn is created if and only if an existing SP | |
290 | * cannot be reused. The ability to reuse a SP is tracked by its role, which | |
291 | * incorporates various mode bits and properties of the SP. Roughly speaking, | |
292 | * the number of unique SPs that can theoretically be created is 2^n, where n | |
293 | * is the number of bits that are used to compute the role. | |
294 | * | |
295 | * But, even though there are 18 bits in the mask below, not all combinations | |
296 | * of modes and flags are possible. The maximum number of possible upper-level | |
297 | * shadow pages for a single gfn is in the neighborhood of 2^13. | |
298 | * | |
299 | * - invalid shadow pages are not accounted. | |
300 | * - level is effectively limited to four combinations, not 16 as the number | |
301 | * bits would imply, as 4k SPs are not tracked (allowed to go unsync). | |
302 | * - level is effectively unused for non-PAE paging because there is exactly | |
303 | * one upper level (see 4k SP exception above). | |
304 | * - quadrant is used only for non-PAE paging and is exclusive with | |
305 | * gpte_is_8_bytes. | |
306 | * - execonly and ad_disabled are used only for nested EPT, which makes it | |
307 | * exclusive with quadrant. | |
21ebbeda | 308 | */ |
d657a98e | 309 | union kvm_mmu_page_role { |
36d9594d | 310 | u32 word; |
d657a98e | 311 | struct { |
7d76b4d3 | 312 | unsigned level:4; |
47c42e6b | 313 | unsigned gpte_is_8_bytes:1; |
7d76b4d3 | 314 | unsigned quadrant:2; |
f6e2c02b | 315 | unsigned direct:1; |
7d76b4d3 | 316 | unsigned access:3; |
2e53d63a | 317 | unsigned invalid:1; |
167f8a5c | 318 | unsigned efer_nx:1; |
3dbe1415 | 319 | unsigned cr0_wp:1; |
411c588d | 320 | unsigned smep_andnot_wp:1; |
0be0226f | 321 | unsigned smap_andnot_wp:1; |
ac8d57e5 | 322 | unsigned ad_disabled:1; |
1313cc2b JM |
323 | unsigned guest_mode:1; |
324 | unsigned :6; | |
699023e2 PB |
325 | |
326 | /* | |
327 | * This is left at the top of the word so that | |
328 | * kvm_memslots_for_spte_role can extract it with a | |
329 | * simple shift. While there is room, give it a whole | |
330 | * byte so it is also faster to load it from memory. | |
331 | */ | |
332 | unsigned smm:8; | |
d657a98e ZX |
333 | }; |
334 | }; | |
335 | ||
a336282d | 336 | /* |
616007c8 SC |
337 | * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties |
338 | * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, | |
339 | * including on nested transitions, if nothing in the full role changes then | |
340 | * MMU re-configuration can be skipped. @valid bit is set on first usage so we | |
341 | * don't treat all-zero structure as valid data. | |
342 | * | |
343 | * The properties that are tracked in the extended role but not the page role | |
344 | * are for things that either (a) do not affect the validity of the shadow page | |
345 | * or (b) are indirectly reflected in the shadow page's role. For example, | |
346 | * CR4.PKE only affects permission checks for software walks of the guest page | |
347 | * tables (because KVM doesn't support Protection Keys with shadow paging), and | |
348 | * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. | |
349 | * | |
350 | * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. | |
351 | * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and | |
352 | * SMAP, but the MMU's permission checks for software walks need to be SMEP and | |
353 | * SMAP aware regardless of CR0.WP. | |
a336282d | 354 | */ |
616007c8 | 355 | union kvm_mmu_extended_role { |
36d9594d | 356 | u32 word; |
a336282d VK |
357 | struct { |
358 | unsigned int valid:1; | |
359 | unsigned int execonly:1; | |
7dcd5755 | 360 | unsigned int cr0_pg:1; |
0699c64a | 361 | unsigned int cr4_pae:1; |
a336282d VK |
362 | unsigned int cr4_pse:1; |
363 | unsigned int cr4_pke:1; | |
364 | unsigned int cr4_smap:1; | |
365 | unsigned int cr4_smep:1; | |
f71a53d1 | 366 | unsigned int cr4_la57:1; |
a336282d | 367 | }; |
36d9594d VK |
368 | }; |
369 | ||
370 | union kvm_mmu_role { | |
371 | u64 as_u64; | |
372 | struct { | |
373 | union kvm_mmu_page_role base; | |
374 | union kvm_mmu_extended_role ext; | |
375 | }; | |
376 | }; | |
377 | ||
018aabb5 TY |
378 | struct kvm_rmap_head { |
379 | unsigned long val; | |
380 | }; | |
381 | ||
1c08364c | 382 | struct kvm_pio_request { |
45def77e | 383 | unsigned long linear_rip; |
1c08364c | 384 | unsigned long count; |
1c08364c AK |
385 | int in; |
386 | int port; | |
387 | int size; | |
1c08364c AK |
388 | }; |
389 | ||
855feb67 | 390 | #define PT64_ROOT_MAX_LEVEL 5 |
2a7266a8 | 391 | |
a0a64f50 | 392 | struct rsvd_bits_validate { |
2a7266a8 | 393 | u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; |
a0a64f50 XG |
394 | u64 bad_mt_xwr; |
395 | }; | |
396 | ||
7c390d35 | 397 | struct kvm_mmu_root_info { |
be01e8e2 | 398 | gpa_t pgd; |
7c390d35 JS |
399 | hpa_t hpa; |
400 | }; | |
401 | ||
402 | #define KVM_MMU_ROOT_INFO_INVALID \ | |
be01e8e2 | 403 | ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) |
7c390d35 | 404 | |
b94742c9 JS |
405 | #define KVM_MMU_NUM_PREV_ROOTS 3 |
406 | ||
531810ca BG |
407 | #define KVM_HAVE_MMU_RWLOCK |
408 | ||
985ab278 SC |
409 | struct kvm_mmu_page; |
410 | ||
d657a98e | 411 | /* |
855feb67 YZ |
412 | * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, |
413 | * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the | |
414 | * current mmu mode. | |
d657a98e ZX |
415 | */ |
416 | struct kvm_mmu { | |
d8dd54e0 | 417 | unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); |
e4e517b4 | 418 | u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); |
736c291c | 419 | int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, |
78b2c54a | 420 | bool prefault); |
6389ee94 AK |
421 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, |
422 | struct x86_exception *fault); | |
736c291c SC |
423 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, |
424 | u32 access, struct x86_exception *exception); | |
54987b7a PB |
425 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, |
426 | struct x86_exception *exception); | |
e8bc217a | 427 | int (*sync_page)(struct kvm_vcpu *vcpu, |
a4a8e6f7 | 428 | struct kvm_mmu_page *sp); |
7eb77e9f | 429 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); |
d657a98e | 430 | hpa_t root_hpa; |
be01e8e2 | 431 | gpa_t root_pgd; |
36d9594d | 432 | union kvm_mmu_role mmu_role; |
ae1e2d10 PB |
433 | u8 root_level; |
434 | u8 shadow_root_level; | |
435 | u8 ept_ad; | |
c5a78f2b | 436 | bool direct_map; |
b94742c9 | 437 | struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; |
d657a98e | 438 | |
97d64b78 AK |
439 | /* |
440 | * Bitmap; bit set = permission fault | |
441 | * Byte index: page fault error code [4:1] | |
442 | * Bit index: pte permissions in ACC_* format | |
443 | */ | |
444 | u8 permissions[16]; | |
445 | ||
2d344105 HH |
446 | /* |
447 | * The pkru_mask indicates if protection key checks are needed. It | |
448 | * consists of 16 domains indexed by page fault error code bits [4:1], | |
449 | * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. | |
450 | * Each domain has 2 bits which are ANDed with AD and WD from PKRU. | |
451 | */ | |
452 | u32 pkru_mask; | |
453 | ||
d657a98e | 454 | u64 *pae_root; |
03ca4589 | 455 | u64 *pml4_root; |
cb0f722a | 456 | u64 *pml5_root; |
c258b62b XG |
457 | |
458 | /* | |
459 | * check zero bits on shadow page table entries, these | |
460 | * bits include not only hardware reserved bits but also | |
461 | * the bits spte never used. | |
462 | */ | |
463 | struct rsvd_bits_validate shadow_zero_check; | |
464 | ||
a0a64f50 | 465 | struct rsvd_bits_validate guest_rsvd_check; |
ff03a073 JR |
466 | |
467 | u64 pdptrs[4]; /* pae */ | |
d657a98e ZX |
468 | }; |
469 | ||
a49b9635 LT |
470 | struct kvm_tlb_range { |
471 | u64 start_gfn; | |
472 | u64 pages; | |
473 | }; | |
474 | ||
f5132b01 GN |
475 | enum pmc_type { |
476 | KVM_PMC_GP = 0, | |
477 | KVM_PMC_FIXED, | |
478 | }; | |
479 | ||
480 | struct kvm_pmc { | |
481 | enum pmc_type type; | |
482 | u8 idx; | |
483 | u64 counter; | |
484 | u64 eventsel; | |
485 | struct perf_event *perf_event; | |
486 | struct kvm_vcpu *vcpu; | |
a6da0d77 LX |
487 | /* |
488 | * eventsel value for general purpose counters, | |
489 | * ctrl value for fixed counters. | |
490 | */ | |
491 | u64 current_config; | |
e79f49c3 | 492 | bool is_paused; |
f5132b01 GN |
493 | }; |
494 | ||
495 | struct kvm_pmu { | |
496 | unsigned nr_arch_gp_counters; | |
497 | unsigned nr_arch_fixed_counters; | |
498 | unsigned available_event_types; | |
499 | u64 fixed_ctr_ctrl; | |
500 | u64 global_ctrl; | |
501 | u64 global_status; | |
502 | u64 global_ovf_ctrl; | |
503 | u64 counter_bitmask[2]; | |
504 | u64 global_ctrl_mask; | |
c715eb9f | 505 | u64 global_ovf_ctrl_mask; |
103af0a9 | 506 | u64 reserved_bits; |
f5132b01 | 507 | u8 version; |
15c7ad51 RR |
508 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; |
509 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; | |
f5132b01 | 510 | struct irq_work irq_work; |
4be94672 | 511 | DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); |
b35e5548 LX |
512 | DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); |
513 | DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); | |
514 | ||
515 | /* | |
516 | * The gate to release perf_events not marked in | |
517 | * pmc_in_use only once in a vcpu time slice. | |
518 | */ | |
519 | bool need_cleanup; | |
520 | ||
521 | /* | |
522 | * The total number of programmed perf_events and it helps to avoid | |
523 | * redundant check before cleanup if guest don't use vPMU at all. | |
524 | */ | |
525 | u8 event_count; | |
f5132b01 GN |
526 | }; |
527 | ||
25462f7f WH |
528 | struct kvm_pmu_ops; |
529 | ||
360b948d PB |
530 | enum { |
531 | KVM_DEBUGREG_BP_ENABLED = 1, | |
c77fb5fe | 532 | KVM_DEBUGREG_WONT_EXIT = 2, |
360b948d PB |
533 | }; |
534 | ||
86fd5270 XG |
535 | struct kvm_mtrr_range { |
536 | u64 base; | |
537 | u64 mask; | |
19efffa2 | 538 | struct list_head node; |
86fd5270 XG |
539 | }; |
540 | ||
70109e7d | 541 | struct kvm_mtrr { |
86fd5270 | 542 | struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR]; |
70109e7d | 543 | mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION]; |
10fac2dc | 544 | u64 deftype; |
19efffa2 XG |
545 | |
546 | struct list_head head; | |
70109e7d XG |
547 | }; |
548 | ||
1f4b34f8 AS |
549 | /* Hyper-V SynIC timer */ |
550 | struct kvm_vcpu_hv_stimer { | |
551 | struct hrtimer timer; | |
552 | int index; | |
6a058a1e | 553 | union hv_stimer_config config; |
1f4b34f8 AS |
554 | u64 count; |
555 | u64 exp_time; | |
556 | struct hv_message msg; | |
557 | bool msg_pending; | |
558 | }; | |
559 | ||
5c919412 AS |
560 | /* Hyper-V synthetic interrupt controller (SynIC)*/ |
561 | struct kvm_vcpu_hv_synic { | |
562 | u64 version; | |
563 | u64 control; | |
564 | u64 msg_page; | |
565 | u64 evt_page; | |
566 | atomic64_t sint[HV_SYNIC_SINT_COUNT]; | |
567 | atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; | |
568 | DECLARE_BITMAP(auto_eoi_bitmap, 256); | |
569 | DECLARE_BITMAP(vec_bitmap, 256); | |
570 | bool active; | |
efc479e6 | 571 | bool dont_zero_synic_pages; |
5c919412 AS |
572 | }; |
573 | ||
e83d5887 AS |
574 | /* Hyper-V per vcpu emulation context */ |
575 | struct kvm_vcpu_hv { | |
4592b7ea | 576 | struct kvm_vcpu *vcpu; |
d3457c87 | 577 | u32 vp_index; |
e83d5887 | 578 | u64 hv_vapic; |
9eec50b8 | 579 | s64 runtime_offset; |
5c919412 | 580 | struct kvm_vcpu_hv_synic synic; |
db397571 | 581 | struct kvm_hyperv_exit exit; |
1f4b34f8 AS |
582 | struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; |
583 | DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); | |
e6b6c483 | 584 | cpumask_t tlb_flush; |
644f7067 | 585 | bool enforce_cpuid; |
10d7bf1e VK |
586 | struct { |
587 | u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ | |
588 | u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ | |
589 | u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ | |
590 | u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ | |
591 | u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ | |
592 | u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ | |
593 | } cpuid_cache; | |
e83d5887 AS |
594 | }; |
595 | ||
23200b7a JM |
596 | /* Xen HVM per vcpu emulation context */ |
597 | struct kvm_vcpu_xen { | |
598 | u64 hypercall_rip; | |
30b5c851 | 599 | u32 current_runstate; |
73e69a86 | 600 | bool vcpu_info_set; |
f2340cd9 | 601 | bool vcpu_time_info_set; |
30b5c851 | 602 | bool runstate_set; |
73e69a86 | 603 | struct gfn_to_hva_cache vcpu_info_cache; |
f2340cd9 | 604 | struct gfn_to_hva_cache vcpu_time_info_cache; |
30b5c851 DW |
605 | struct gfn_to_hva_cache runstate_cache; |
606 | u64 last_steal; | |
607 | u64 runstate_entry_time; | |
608 | u64 runstate_times[4]; | |
23200b7a JM |
609 | }; |
610 | ||
ad312c7c | 611 | struct kvm_vcpu_arch { |
5fdbf976 MT |
612 | /* |
613 | * rip and regs accesses must go through | |
614 | * kvm_{register,rip}_{read,write} functions. | |
615 | */ | |
616 | unsigned long regs[NR_VCPU_REGS]; | |
617 | u32 regs_avail; | |
618 | u32 regs_dirty; | |
34c16eec ZX |
619 | |
620 | unsigned long cr0; | |
e8467fda | 621 | unsigned long cr0_guest_owned_bits; |
34c16eec ZX |
622 | unsigned long cr2; |
623 | unsigned long cr3; | |
624 | unsigned long cr4; | |
fc78f519 | 625 | unsigned long cr4_guest_owned_bits; |
b899c132 | 626 | unsigned long cr4_guest_rsvd_bits; |
34c16eec | 627 | unsigned long cr8; |
37486135 | 628 | u32 host_pkru; |
b9dd21e1 | 629 | u32 pkru; |
1371d904 | 630 | u32 hflags; |
f6801dff | 631 | u64 efer; |
34c16eec ZX |
632 | u64 apic_base; |
633 | struct kvm_lapic *apic; /* kernel irqchip context */ | |
d62caabb | 634 | bool apicv_active; |
e40ff1d6 | 635 | bool load_eoi_exitmap_pending; |
6308630b | 636 | DECLARE_BITMAP(ioapic_handled_vectors, 256); |
41383771 | 637 | unsigned long apic_attention; |
e1035715 | 638 | int32_t apic_arb_prio; |
34c16eec | 639 | int mp_state; |
34c16eec | 640 | u64 ia32_misc_enable_msr; |
64d60670 | 641 | u64 smbase; |
52797bf9 | 642 | u64 smi_count; |
b209749f | 643 | bool tpr_access_reporting; |
7204160e | 644 | bool xsaves_enabled; |
20300099 | 645 | u64 ia32_xss; |
518e7b94 | 646 | u64 microcode_version; |
0cf9135b | 647 | u64 arch_capabilities; |
27461da3 | 648 | u64 perf_capabilities; |
34c16eec | 649 | |
14dfe855 JR |
650 | /* |
651 | * Paging state of the vcpu | |
652 | * | |
653 | * If the vcpu runs in guest mode with two level paging this still saves | |
654 | * the paging mode of the l1 guest. This context is always used to | |
655 | * handle faults. | |
656 | */ | |
44dd3ffa VK |
657 | struct kvm_mmu *mmu; |
658 | ||
659 | /* Non-nested MMU for L1 */ | |
660 | struct kvm_mmu root_mmu; | |
8df25a32 | 661 | |
14c07ad8 VK |
662 | /* L1 MMU when running nested */ |
663 | struct kvm_mmu guest_mmu; | |
664 | ||
6539e738 JR |
665 | /* |
666 | * Paging state of an L2 guest (used for nested npt) | |
667 | * | |
668 | * This context will save all necessary information to walk page tables | |
311497e0 | 669 | * of an L2 guest. This context is only initialized for page table |
6539e738 JR |
670 | * walking and not for faulting since we never handle l2 page faults on |
671 | * the host. | |
672 | */ | |
673 | struct kvm_mmu nested_mmu; | |
674 | ||
14dfe855 JR |
675 | /* |
676 | * Pointer to the mmu context currently used for | |
677 | * gva_to_gpa translations. | |
678 | */ | |
679 | struct kvm_mmu *walk_mmu; | |
680 | ||
53c07b18 | 681 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; |
171a90d7 SC |
682 | struct kvm_mmu_memory_cache mmu_shadow_page_cache; |
683 | struct kvm_mmu_memory_cache mmu_gfn_array_cache; | |
34c16eec ZX |
684 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
685 | ||
f775b13e RR |
686 | /* |
687 | * QEMU userspace and the guest each have their own FPU state. | |
ec269475 PB |
688 | * In vcpu_run, we switch between the user and guest FPU contexts. |
689 | * While running a VCPU, the VCPU thread will have the guest FPU | |
690 | * context. | |
f775b13e RR |
691 | * |
692 | * Note that while the PKRU state lives inside the fpu registers, | |
693 | * it is switched out separately at VMENTER and VMEXIT time. The | |
694 | * "guest_fpu" state here contains the guest FPU context, with the | |
695 | * host PRKU bits. | |
696 | */ | |
d9a710e5 | 697 | struct fpu *user_fpu; |
b666a4b6 | 698 | struct fpu *guest_fpu; |
f775b13e | 699 | |
2acf923e | 700 | u64 xcr0; |
d7876f1b | 701 | u64 guest_supported_xcr0; |
34c16eec | 702 | |
34c16eec ZX |
703 | struct kvm_pio_request pio; |
704 | void *pio_data; | |
b5998402 | 705 | void *sev_pio_data; |
34c16eec | 706 | |
66fd3f7f GN |
707 | u8 event_exit_inst_len; |
708 | ||
298101da AK |
709 | struct kvm_queued_exception { |
710 | bool pending; | |
664f8e26 | 711 | bool injected; |
298101da AK |
712 | bool has_error_code; |
713 | u8 nr; | |
714 | u32 error_code; | |
c851436a JM |
715 | unsigned long payload; |
716 | bool has_payload; | |
adfe20fb | 717 | u8 nested_apf; |
298101da AK |
718 | } exception; |
719 | ||
937a7eae | 720 | struct kvm_queued_interrupt { |
04140b41 | 721 | bool injected; |
66fd3f7f | 722 | bool soft; |
937a7eae AK |
723 | u8 nr; |
724 | } interrupt; | |
725 | ||
34c16eec ZX |
726 | int halt_request; /* real mode on Intel only */ |
727 | ||
728 | int cpuid_nent; | |
255cbecf | 729 | struct kvm_cpuid_entry2 *cpuid_entries; |
5a4f55cd | 730 | |
ca29e145 | 731 | u64 reserved_gpa_bits; |
5a4f55cd EK |
732 | int maxphyaddr; |
733 | ||
34c16eec ZX |
734 | /* emulate context */ |
735 | ||
c9b8b07c | 736 | struct x86_emulate_ctxt *emulate_ctxt; |
7ae441ea GN |
737 | bool emulate_regs_need_sync_to_vcpu; |
738 | bool emulate_regs_need_sync_from_vcpu; | |
716d51ab | 739 | int (*complete_userspace_io)(struct kvm_vcpu *vcpu); |
18068523 GOC |
740 | |
741 | gpa_t time; | |
50d0a0f9 | 742 | struct pvclock_vcpu_time_info hv_clock; |
e48672fa | 743 | unsigned int hw_tsc_khz; |
0b79459b AH |
744 | struct gfn_to_hva_cache pv_time; |
745 | bool pv_time_enabled; | |
51d59c6b MT |
746 | /* set guest stopped flag in pvclock flags field */ |
747 | bool pvclock_set_guest_stopped_request; | |
c9aaa895 GC |
748 | |
749 | struct { | |
a6bd811f | 750 | u8 preempted; |
c9aaa895 GC |
751 | u64 msr_val; |
752 | u64 last_steal; | |
91724814 | 753 | struct gfn_to_pfn_cache cache; |
c9aaa895 GC |
754 | } st; |
755 | ||
56ba77a4 | 756 | u64 l1_tsc_offset; |
805d705f | 757 | u64 tsc_offset; /* current tsc offset */ |
1d5f066e | 758 | u64 last_guest_tsc; |
6f526ec5 | 759 | u64 last_host_tsc; |
0dd6a6ed | 760 | u64 tsc_offset_adjustment; |
e26101b1 ZA |
761 | u64 this_tsc_nsec; |
762 | u64 this_tsc_write; | |
0d3da0d2 | 763 | u64 this_tsc_generation; |
c285545f | 764 | bool tsc_catchup; |
cc578287 ZA |
765 | bool tsc_always_catchup; |
766 | s8 virtual_tsc_shift; | |
767 | u32 virtual_tsc_mult; | |
768 | u32 virtual_tsc_khz; | |
ba904635 | 769 | s64 ia32_tsc_adjust_msr; |
73f624f4 | 770 | u64 msr_ia32_power_ctl; |
805d705f IS |
771 | u64 l1_tsc_scaling_ratio; |
772 | u64 tsc_scaling_ratio; /* current scaling ratio */ | |
3419ffc8 | 773 | |
7460fb4a AK |
774 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
775 | unsigned nmi_pending; /* NMI queued after currently running handler */ | |
776 | bool nmi_injected; /* Trying to inject an NMI this entry */ | |
f077825a | 777 | bool smi_pending; /* SMI queued after currently running handler */ |
9ba075a6 | 778 | |
70109e7d | 779 | struct kvm_mtrr mtrr_state; |
7cb060a9 | 780 | u64 pat; |
42dbaa5a | 781 | |
360b948d | 782 | unsigned switch_db_regs; |
42dbaa5a JK |
783 | unsigned long db[KVM_NR_DB_REGS]; |
784 | unsigned long dr6; | |
785 | unsigned long dr7; | |
786 | unsigned long eff_db[KVM_NR_DB_REGS]; | |
c8639010 | 787 | unsigned long guest_debug_dr7; |
db2336a8 KH |
788 | u64 msr_platform_info; |
789 | u64 msr_misc_features_enables; | |
890ca9ae HY |
790 | |
791 | u64 mcg_cap; | |
792 | u64 mcg_status; | |
793 | u64 mcg_ctl; | |
c45dcc71 | 794 | u64 mcg_ext_ctl; |
890ca9ae | 795 | u64 *mce_banks; |
94fe45da | 796 | |
bebb106a XG |
797 | /* Cache MMIO info */ |
798 | u64 mmio_gva; | |
871bd034 | 799 | unsigned mmio_access; |
bebb106a | 800 | gfn_t mmio_gfn; |
56f17dd3 | 801 | u64 mmio_gen; |
bebb106a | 802 | |
f5132b01 GN |
803 | struct kvm_pmu pmu; |
804 | ||
94fe45da | 805 | /* used for guest single stepping over the given code position */ |
94fe45da | 806 | unsigned long singlestep_rip; |
f92653ee | 807 | |
8f014550 | 808 | bool hyperv_enabled; |
4592b7ea | 809 | struct kvm_vcpu_hv *hyperv; |
23200b7a | 810 | struct kvm_vcpu_xen xen; |
f5f48ee1 SY |
811 | |
812 | cpumask_var_t wbinvd_dirty_mask; | |
af585b92 | 813 | |
1cb3f3ae XG |
814 | unsigned long last_retry_eip; |
815 | unsigned long last_retry_addr; | |
816 | ||
af585b92 GN |
817 | struct { |
818 | bool halted; | |
dd03bcaa | 819 | gfn_t gfns[ASYNC_PF_PER_VCPU]; |
344d9588 | 820 | struct gfn_to_hva_cache data; |
2635b5c4 VK |
821 | u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ |
822 | u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ | |
823 | u16 vec; | |
7c90705b | 824 | u32 id; |
6adba527 | 825 | bool send_user_only; |
68fd66f1 | 826 | u32 host_apf_flags; |
adfe20fb | 827 | unsigned long nested_apf_token; |
52a5c155 | 828 | bool delivery_as_pf_vmexit; |
557a961a | 829 | bool pageready_pending; |
af585b92 | 830 | } apf; |
2b036c6b BO |
831 | |
832 | /* OSVW MSRs (AMD only) */ | |
833 | struct { | |
834 | u64 length; | |
835 | u64 status; | |
836 | } osvw; | |
ae7a2a3f MT |
837 | |
838 | struct { | |
839 | u64 msr_val; | |
840 | struct gfn_to_hva_cache data; | |
841 | } pv_eoi; | |
93c05d3e | 842 | |
2d5ba19b MT |
843 | u64 msr_kvm_poll_control; |
844 | ||
93c05d3e | 845 | /* |
ffdbd50d ML |
846 | * Indicates the guest is trying to write a gfn that contains one or |
847 | * more of the PTEs used to translate the write itself, i.e. the access | |
848 | * is changing its own translation in the guest page tables. KVM exits | |
849 | * to userspace if emulation of the faulting instruction fails and this | |
850 | * flag is set, as KVM cannot make forward progress. | |
851 | * | |
852 | * If emulation fails for a write to guest page tables, KVM unprotects | |
853 | * (zaps) the shadow page for the target gfn and resumes the guest to | |
854 | * retry the non-emulatable instruction (on hardware). Unprotecting the | |
855 | * gfn doesn't allow forward progress for a self-changing access because | |
856 | * doing so also zaps the translation for the gfn, i.e. retrying the | |
857 | * instruction will hit a !PRESENT fault, which results in a new shadow | |
858 | * page and sends KVM back to square one. | |
93c05d3e XG |
859 | */ |
860 | bool write_fault_to_shadow_pgtable; | |
25d92081 YZ |
861 | |
862 | /* set at EPT violation at this point */ | |
863 | unsigned long exit_qualification; | |
6aef266c SV |
864 | |
865 | /* pv related host specific info */ | |
866 | struct { | |
867 | bool pv_unhalted; | |
868 | } pv; | |
7543a635 SR |
869 | |
870 | int pending_ioapic_eoi; | |
1c1a9ce9 | 871 | int pending_external_vector; |
0f89b207 | 872 | |
de63ad4c LM |
873 | /* be preempted when it's in kernel-mode(cpl=0) */ |
874 | bool preempted_in_kernel; | |
c595ceee PB |
875 | |
876 | /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ | |
877 | bool l1tf_flush_l1d; | |
191c8137 | 878 | |
8a14fe4f | 879 | /* Host CPU on which VM-entry was most recently attempted */ |
63f5a190 | 880 | int last_vmentry_cpu; |
8a14fe4f | 881 | |
191c8137 BP |
882 | /* AMD MSRC001_0015 Hardware Configuration */ |
883 | u64 msr_hwcr; | |
66570e96 OU |
884 | |
885 | /* pv related cpuid info */ | |
886 | struct { | |
887 | /* | |
888 | * value of the eax register in the KVM_CPUID_FEATURES CPUID | |
889 | * leaf. | |
890 | */ | |
891 | u32 features; | |
892 | ||
893 | /* | |
894 | * indicates whether pv emulation should be disabled if features | |
895 | * are not present in the guest's cpuid | |
896 | */ | |
897 | bool enforce; | |
898 | } pv_cpuid; | |
add5e2f0 TL |
899 | |
900 | /* Protected Guests */ | |
901 | bool guest_state_protected; | |
3c86c0d3 | 902 | |
158a48ec ML |
903 | /* |
904 | * Set when PDPTS were loaded directly by the userspace without | |
905 | * reading the guest memory | |
906 | */ | |
907 | bool pdptrs_from_userspace; | |
908 | ||
3c86c0d3 VP |
909 | #if IS_ENABLED(CONFIG_HYPERV) |
910 | hpa_t hv_root_tdp; | |
911 | #endif | |
34c16eec ZX |
912 | }; |
913 | ||
db3fe4eb | 914 | struct kvm_lpage_info { |
92f94f1e | 915 | int disallow_lpage; |
db3fe4eb TY |
916 | }; |
917 | ||
918 | struct kvm_arch_memory_slot { | |
018aabb5 | 919 | struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; |
db3fe4eb | 920 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
21ebbeda | 921 | unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; |
db3fe4eb TY |
922 | }; |
923 | ||
3548a259 RK |
924 | /* |
925 | * We use as the mode the number of bits allocated in the LDR for the | |
926 | * logical processor ID. It happens that these are all powers of two. | |
927 | * This makes it is very easy to detect cases where the APICs are | |
928 | * configured for multiple modes; in that case, we cannot use the map and | |
929 | * hence cannot use kvm_irq_delivery_to_apic_fast either. | |
930 | */ | |
931 | #define KVM_APIC_MODE_XAPIC_CLUSTER 4 | |
932 | #define KVM_APIC_MODE_XAPIC_FLAT 8 | |
933 | #define KVM_APIC_MODE_X2APIC 16 | |
934 | ||
1e08ec4a GN |
935 | struct kvm_apic_map { |
936 | struct rcu_head rcu; | |
3548a259 | 937 | u8 mode; |
0ca52e7b | 938 | u32 max_apic_id; |
e45115b6 RK |
939 | union { |
940 | struct kvm_lapic *xapic_flat_map[8]; | |
941 | struct kvm_lapic *xapic_cluster_map[16][4]; | |
942 | }; | |
0ca52e7b | 943 | struct kvm_lapic *phys_map[]; |
1e08ec4a GN |
944 | }; |
945 | ||
f97f5a56 JD |
946 | /* Hyper-V synthetic debugger (SynDbg)*/ |
947 | struct kvm_hv_syndbg { | |
948 | struct { | |
949 | u64 control; | |
950 | u64 status; | |
951 | u64 send_page; | |
952 | u64 recv_page; | |
953 | u64 pending_page; | |
954 | } control; | |
955 | u64 options; | |
956 | }; | |
957 | ||
cc9cfddb VK |
958 | /* Current state of Hyper-V TSC page clocksource */ |
959 | enum hv_tsc_page_status { | |
960 | /* TSC page was not set up or disabled */ | |
961 | HV_TSC_PAGE_UNSET = 0, | |
962 | /* TSC page MSR was written by the guest, update pending */ | |
963 | HV_TSC_PAGE_GUEST_CHANGED, | |
964 | /* TSC page MSR was written by KVM userspace, update pending */ | |
965 | HV_TSC_PAGE_HOST_CHANGED, | |
966 | /* TSC page was properly set up and is currently active */ | |
967 | HV_TSC_PAGE_SET, | |
968 | /* TSC page is currently being updated and therefore is inactive */ | |
969 | HV_TSC_PAGE_UPDATING, | |
970 | /* TSC page was set up with an inaccessible GPA */ | |
971 | HV_TSC_PAGE_BROKEN, | |
972 | }; | |
973 | ||
e83d5887 AS |
974 | /* Hyper-V emulation context */ |
975 | struct kvm_hv { | |
3f5ad8be | 976 | struct mutex hv_lock; |
e83d5887 AS |
977 | u64 hv_guest_os_id; |
978 | u64 hv_hypercall; | |
979 | u64 hv_tsc_page; | |
cc9cfddb | 980 | enum hv_tsc_page_status hv_tsc_page_status; |
e7d9513b AS |
981 | |
982 | /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ | |
983 | u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; | |
984 | u64 hv_crash_ctl; | |
095cf55d | 985 | |
7357b1df | 986 | struct ms_hyperv_tsc_page tsc_ref; |
faeb7833 RK |
987 | |
988 | struct idr conn_to_evt; | |
a2e164e7 VK |
989 | |
990 | u64 hv_reenlightenment_control; | |
991 | u64 hv_tsc_emulation_control; | |
992 | u64 hv_tsc_emulation_status; | |
87ee613d VK |
993 | |
994 | /* How many vCPUs have VP index != vCPU index */ | |
995 | atomic_t num_mismatched_vp_indexes; | |
6f6a657c | 996 | |
0f250a64 VK |
997 | /* |
998 | * How many SynICs use 'AutoEOI' feature | |
999 | * (protected by arch.apicv_update_lock) | |
1000 | */ | |
1001 | unsigned int synic_auto_eoi_used; | |
1002 | ||
6f6a657c | 1003 | struct hv_partition_assist_pg *hv_pa_pg; |
f97f5a56 | 1004 | struct kvm_hv_syndbg hv_syndbg; |
e83d5887 AS |
1005 | }; |
1006 | ||
1a155254 AG |
1007 | struct msr_bitmap_range { |
1008 | u32 flags; | |
1009 | u32 nmsrs; | |
1010 | u32 base; | |
1011 | unsigned long *bitmap; | |
1012 | }; | |
1013 | ||
a3833b81 DW |
1014 | /* Xen emulation context */ |
1015 | struct kvm_xen { | |
1016 | bool long_mode; | |
40da8ccd | 1017 | u8 upcall_vector; |
319afe68 | 1018 | gfn_t shinfo_gfn; |
a3833b81 DW |
1019 | }; |
1020 | ||
49776faf RK |
1021 | enum kvm_irqchip_mode { |
1022 | KVM_IRQCHIP_NONE, | |
1023 | KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ | |
1024 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ | |
1025 | }; | |
1026 | ||
b318e8de SC |
1027 | struct kvm_x86_msr_filter { |
1028 | u8 count; | |
1029 | bool default_allow:1; | |
1030 | struct msr_bitmap_range ranges[16]; | |
1031 | }; | |
1032 | ||
4e19c36f | 1033 | #define APICV_INHIBIT_REASON_DISABLE 0 |
f4fdc0a2 | 1034 | #define APICV_INHIBIT_REASON_HYPERV 1 |
9a0bf054 | 1035 | #define APICV_INHIBIT_REASON_NESTED 2 |
f3515dc3 | 1036 | #define APICV_INHIBIT_REASON_IRQWIN 3 |
e2ed4078 | 1037 | #define APICV_INHIBIT_REASON_PIT_REINJ 4 |
cc7f5577 | 1038 | #define APICV_INHIBIT_REASON_X2APIC 5 |
4e19c36f | 1039 | |
fef9cce0 | 1040 | struct kvm_arch { |
bc8a3d89 BG |
1041 | unsigned long n_used_mmu_pages; |
1042 | unsigned long n_requested_mmu_pages; | |
1043 | unsigned long n_max_mmu_pages; | |
332b207d | 1044 | unsigned int indirect_shadow_pages; |
ca333add | 1045 | u8 mmu_valid_gen; |
f05e70ac | 1046 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
f05e70ac | 1047 | struct list_head active_mmu_pages; |
31741eb1 | 1048 | struct list_head zapped_obsolete_pages; |
1aa9b957 | 1049 | struct list_head lpage_disallowed_mmu_pages; |
13d268ca | 1050 | struct kvm_page_track_notifier_node mmu_sp_tracker; |
0eb05bf2 | 1051 | struct kvm_page_track_notifier_head track_notifier_head; |
ce25681d SC |
1052 | /* |
1053 | * Protects marking pages unsync during page faults, as TDP MMU page | |
1054 | * faults only take mmu_lock for read. For simplicity, the unsync | |
1055 | * pages lock is always taken when marking pages unsync regardless of | |
1056 | * whether mmu_lock is held for read or write. | |
1057 | */ | |
1058 | spinlock_t mmu_unsync_pages_lock; | |
365c8868 | 1059 | |
4d5c5d0f | 1060 | struct list_head assigned_dev_head; |
19de40a8 | 1061 | struct iommu_domain *iommu_domain; |
d96eb2c6 | 1062 | bool iommu_noncoherent; |
e0f0bbc5 AW |
1063 | #define __KVM_HAVE_ARCH_NONCOHERENT_DMA |
1064 | atomic_t noncoherent_dma_count; | |
5544eb9b PB |
1065 | #define __KVM_HAVE_ARCH_ASSIGNED_DEVICE |
1066 | atomic_t assigned_device_count; | |
d7deeeb0 ZX |
1067 | struct kvm_pic *vpic; |
1068 | struct kvm_ioapic *vioapic; | |
7837699f | 1069 | struct kvm_pit *vpit; |
42720138 | 1070 | atomic_t vapics_in_nmi_mode; |
1e08ec4a | 1071 | struct mutex apic_map_lock; |
6fcd9cbc | 1072 | struct kvm_apic_map __rcu *apic_map; |
44d52717 | 1073 | atomic_t apic_map_dirty; |
bfc6d222 | 1074 | |
b0a1637f ML |
1075 | /* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */ |
1076 | struct mutex apicv_update_lock; | |
1077 | ||
a01b45e9 | 1078 | bool apic_access_memslot_enabled; |
4e19c36f | 1079 | unsigned long apicv_inhibit_reasons; |
18068523 GOC |
1080 | |
1081 | gpa_t wall_clock; | |
b7ebfb05 | 1082 | |
4d5422ce | 1083 | bool mwait_in_guest; |
caa057a2 | 1084 | bool hlt_in_guest; |
b31c114b | 1085 | bool pause_in_guest; |
b5170063 | 1086 | bool cstate_in_guest; |
4d5422ce | 1087 | |
5550af4d | 1088 | unsigned long irq_sources_bitmap; |
afbcf7ab | 1089 | s64 kvmclock_offset; |
038f8c11 | 1090 | raw_spinlock_t tsc_write_lock; |
f38e098f | 1091 | u64 last_tsc_nsec; |
f38e098f | 1092 | u64 last_tsc_write; |
5d3cb0f6 | 1093 | u32 last_tsc_khz; |
e26101b1 ZA |
1094 | u64 cur_tsc_nsec; |
1095 | u64 cur_tsc_write; | |
1096 | u64 cur_tsc_offset; | |
0d3da0d2 | 1097 | u64 cur_tsc_generation; |
b48aa97e | 1098 | int nr_vcpus_matched_tsc; |
ffde22ac | 1099 | |
d828199e MT |
1100 | spinlock_t pvclock_gtod_sync_lock; |
1101 | bool use_master_clock; | |
1102 | u64 master_kernel_ns; | |
a5a1d1c2 | 1103 | u64 master_cycle_now; |
7e44e449 | 1104 | struct delayed_work kvmclock_update_work; |
332967a3 | 1105 | struct delayed_work kvmclock_sync_work; |
d828199e | 1106 | |
ffde22ac | 1107 | struct kvm_xen_hvm_config xen_hvm_config; |
55cd8e5a | 1108 | |
6ef768fa PB |
1109 | /* reads protected by irq_srcu, writes by irq_lock */ |
1110 | struct hlist_head mask_notifier_list; | |
1111 | ||
e83d5887 | 1112 | struct kvm_hv hyperv; |
a3833b81 | 1113 | struct kvm_xen xen; |
b034cf01 XG |
1114 | |
1115 | #ifdef CONFIG_KVM_MMU_AUDIT | |
1116 | int audit_point; | |
1117 | #endif | |
54750f2c | 1118 | |
a826faf1 | 1119 | bool backwards_tsc_observed; |
54750f2c | 1120 | bool boot_vcpu_runs_old_kvmclock; |
d71ba788 | 1121 | u32 bsp_vcpu_id; |
90de4a18 NA |
1122 | |
1123 | u64 disabled_quirks; | |
a85863c2 | 1124 | int cpu_dirty_logging_count; |
49df6397 | 1125 | |
49776faf | 1126 | enum kvm_irqchip_mode irqchip_mode; |
b053b2ae | 1127 | u8 nr_reserved_ioapic_pins; |
52004014 FW |
1128 | |
1129 | bool disabled_lapic_found; | |
44a95dae | 1130 | |
37131313 | 1131 | bool x2apic_format; |
c519265f | 1132 | bool x2apic_broadcast_quirk_disabled; |
6fbbde9a DS |
1133 | |
1134 | bool guest_can_read_msr_platform_info; | |
59073aaf | 1135 | bool exception_payload_enabled; |
66bb8a06 | 1136 | |
b318e8de | 1137 | bool bus_lock_detection_enabled; |
19238e75 AL |
1138 | /* |
1139 | * If exit_on_emulation_error is set, and the in-kernel instruction | |
1140 | * emulator fails to emulate an instruction, allow userspace | |
1141 | * the opportunity to look at it. | |
1142 | */ | |
1143 | bool exit_on_emulation_error; | |
b318e8de | 1144 | |
1ae09954 AG |
1145 | /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ |
1146 | u32 user_space_msr_mask; | |
b318e8de | 1147 | struct kvm_x86_msr_filter __rcu *msr_filter; |
fe6b6bc8 | 1148 | |
0dbb1123 AK |
1149 | u32 hypercall_exit_enabled; |
1150 | ||
70210c04 SC |
1151 | /* Guest can access the SGX PROVISIONKEY. */ |
1152 | bool sgx_provisioning_allowed; | |
1153 | ||
6fcd9cbc | 1154 | struct kvm_pmu_event_filter __rcu *pmu_event_filter; |
1aa9b957 | 1155 | struct task_struct *nx_lpage_recovery_thread; |
fe5db27d | 1156 | |
897218ff | 1157 | #ifdef CONFIG_X86_64 |
fe5db27d BG |
1158 | /* |
1159 | * Whether the TDP MMU is enabled for this VM. This contains a | |
1160 | * snapshot of the TDP MMU module parameter from when the VM was | |
1161 | * created and remains unchanged for the life of the VM. If this is | |
1162 | * true, TDP MMU handler functions will run for various MMU | |
1163 | * operations. | |
1164 | */ | |
1165 | bool tdp_mmu_enabled; | |
89c0fd49 | 1166 | |
c0dba6e4 | 1167 | /* |
c0e64238 | 1168 | * List of struct kvm_mmu_pages being used as roots. |
c0dba6e4 BG |
1169 | * All struct kvm_mmu_pages in the list should have |
1170 | * tdp_mmu_page set. | |
c0e64238 BG |
1171 | * |
1172 | * For reads, this list is protected by: | |
1173 | * the MMU lock in read mode + RCU or | |
1174 | * the MMU lock in write mode | |
1175 | * | |
1176 | * For writes, this list is protected by: | |
1177 | * the MMU lock in read mode + the tdp_mmu_pages_lock or | |
1178 | * the MMU lock in write mode | |
1179 | * | |
1180 | * Roots will remain in the list until their tdp_mmu_root_count | |
1181 | * drops to zero, at which point the thread that decremented the | |
1182 | * count to zero should removed the root from the list and clean | |
1183 | * it up, freeing the root after an RCU grace period. | |
c0dba6e4 | 1184 | */ |
02c00b3a | 1185 | struct list_head tdp_mmu_roots; |
c0dba6e4 BG |
1186 | |
1187 | /* | |
1188 | * List of struct kvmp_mmu_pages not being used as roots. | |
1189 | * All struct kvm_mmu_pages in the list should have | |
c0e64238 | 1190 | * tdp_mmu_page set and a tdp_mmu_root_count of 0. |
c0dba6e4 | 1191 | */ |
89c0fd49 | 1192 | struct list_head tdp_mmu_pages; |
9a77daac BG |
1193 | |
1194 | /* | |
1195 | * Protects accesses to the following fields when the MMU lock | |
1196 | * is held in read mode: | |
c0e64238 | 1197 | * - tdp_mmu_roots (above) |
9a77daac BG |
1198 | * - tdp_mmu_pages (above) |
1199 | * - the link field of struct kvm_mmu_pages used by the TDP MMU | |
1200 | * - lpage_disallowed_mmu_pages | |
1201 | * - the lpage_disallowed_link field of struct kvm_mmu_pages used | |
1202 | * by the TDP MMU | |
1203 | * It is acceptable, but not necessary, to acquire this lock when | |
1204 | * the thread holds the MMU lock in write mode. | |
1205 | */ | |
1206 | spinlock_t tdp_mmu_pages_lock; | |
897218ff | 1207 | #endif /* CONFIG_X86_64 */ |
a2557408 BG |
1208 | |
1209 | /* | |
1210 | * If set, rmaps have been allocated for all memslots and should be | |
1211 | * allocated for any newly created or modified memslots. | |
1212 | */ | |
1213 | bool memslots_have_rmaps; | |
3c86c0d3 VP |
1214 | |
1215 | #if IS_ENABLED(CONFIG_HYPERV) | |
1216 | hpa_t hv_root_tdp; | |
1217 | spinlock_t hv_root_tdp_lock; | |
1218 | #endif | |
d69fb81f ZX |
1219 | }; |
1220 | ||
0711456c | 1221 | struct kvm_vm_stat { |
0193cc90 | 1222 | struct kvm_vm_stat_generic generic; |
e3cb6fa0 PB |
1223 | u64 mmu_shadow_zapped; |
1224 | u64 mmu_pte_write; | |
1225 | u64 mmu_pde_zapped; | |
1226 | u64 mmu_flooded; | |
1227 | u64 mmu_recycled; | |
1228 | u64 mmu_cache_miss; | |
1229 | u64 mmu_unsync; | |
71f51d2c MZ |
1230 | union { |
1231 | struct { | |
1232 | atomic64_t pages_4k; | |
1233 | atomic64_t pages_2m; | |
1234 | atomic64_t pages_1g; | |
1235 | }; | |
1236 | atomic64_t pages[KVM_NR_PAGE_SIZES]; | |
1237 | }; | |
e3cb6fa0 PB |
1238 | u64 nx_lpage_splits; |
1239 | u64 max_mmu_page_hash_collisions; | |
ec1cf69c | 1240 | u64 max_mmu_rmap_size; |
0711456c ZX |
1241 | }; |
1242 | ||
77b4c255 | 1243 | struct kvm_vcpu_stat { |
0193cc90 | 1244 | struct kvm_vcpu_stat_generic generic; |
8a7e75d4 SJS |
1245 | u64 pf_fixed; |
1246 | u64 pf_guest; | |
1247 | u64 tlb_flush; | |
1248 | u64 invlpg; | |
1249 | ||
1250 | u64 exits; | |
1251 | u64 io_exits; | |
1252 | u64 mmio_exits; | |
1253 | u64 signal_exits; | |
1254 | u64 irq_window_exits; | |
1255 | u64 nmi_window_exits; | |
c595ceee | 1256 | u64 l1d_flush; |
8a7e75d4 | 1257 | u64 halt_exits; |
8a7e75d4 SJS |
1258 | u64 request_irq_exits; |
1259 | u64 irq_exits; | |
1260 | u64 host_state_reload; | |
8a7e75d4 SJS |
1261 | u64 fpu_reload; |
1262 | u64 insn_emulation; | |
1263 | u64 insn_emulation_fail; | |
1264 | u64 hypercalls; | |
1265 | u64 irq_injections; | |
1266 | u64 nmi_injections; | |
0f1e261e | 1267 | u64 req_event; |
43c11d91 | 1268 | u64 nested_run; |
4a7132ef WL |
1269 | u64 directed_yield_attempted; |
1270 | u64 directed_yield_successful; | |
d5a0483f | 1271 | u64 guest_mode; |
77b4c255 | 1272 | }; |
ad312c7c | 1273 | |
8a76d7f2 JR |
1274 | struct x86_instruction_info; |
1275 | ||
8fe8ab46 WA |
1276 | struct msr_data { |
1277 | bool host_initiated; | |
1278 | u32 index; | |
1279 | u64 data; | |
1280 | }; | |
1281 | ||
cb5281a5 PB |
1282 | struct kvm_lapic_irq { |
1283 | u32 vector; | |
b7cb2231 PB |
1284 | u16 delivery_mode; |
1285 | u16 dest_mode; | |
1286 | bool level; | |
1287 | u16 trig_mode; | |
cb5281a5 PB |
1288 | u32 shorthand; |
1289 | u32 dest_id; | |
93bbf0b8 | 1290 | bool msi_redir_hint; |
cb5281a5 PB |
1291 | }; |
1292 | ||
c96001c5 PX |
1293 | static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) |
1294 | { | |
1295 | return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; | |
1296 | } | |
1297 | ||
ea4a5ff8 | 1298 | struct kvm_x86_ops { |
13a34e06 RK |
1299 | int (*hardware_enable)(void); |
1300 | void (*hardware_disable)(void); | |
6e4fd06f | 1301 | void (*hardware_unsetup)(void); |
774ead3a | 1302 | bool (*cpu_has_accelerated_tpr)(void); |
5719455f | 1303 | bool (*has_emulated_msr)(struct kvm *kvm, u32 index); |
7c1b761b | 1304 | void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1305 | |
562b6b08 | 1306 | unsigned int vm_size; |
03543133 SS |
1307 | int (*vm_init)(struct kvm *kvm); |
1308 | void (*vm_destroy)(struct kvm *kvm); | |
1309 | ||
ea4a5ff8 | 1310 | /* Create, but do not attach this VCPU */ |
987b2594 | 1311 | int (*vcpu_create)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1312 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
d28bc9dd | 1313 | void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); |
ea4a5ff8 ZX |
1314 | |
1315 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); | |
1316 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | |
1317 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | |
ea4a5ff8 | 1318 | |
6986982f | 1319 | void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); |
609e36d3 | 1320 | int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
8fe8ab46 | 1321 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
ea4a5ff8 ZX |
1322 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
1323 | void (*get_segment)(struct kvm_vcpu *vcpu, | |
1324 | struct kvm_segment *var, int seg); | |
2e4d2653 | 1325 | int (*get_cpl)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1326 | void (*set_segment)(struct kvm_vcpu *vcpu, |
1327 | struct kvm_segment *var, int seg); | |
1328 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | |
ea4a5ff8 | 1329 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
c2fe3cd4 SC |
1330 | bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); |
1331 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | |
72f211ec | 1332 | int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
89a27f4d GN |
1333 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
1334 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
1335 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
1336 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
c77fb5fe | 1337 | void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); |
020df079 | 1338 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
5fdbf976 | 1339 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
ea4a5ff8 ZX |
1340 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
1341 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | |
1342 | ||
7780938c | 1343 | void (*tlb_flush_all)(struct kvm_vcpu *vcpu); |
eeeb4f67 | 1344 | void (*tlb_flush_current)(struct kvm_vcpu *vcpu); |
b08660e5 | 1345 | int (*tlb_remote_flush)(struct kvm *kvm); |
a49b9635 LT |
1346 | int (*tlb_remote_flush_with_range)(struct kvm *kvm, |
1347 | struct kvm_tlb_range *range); | |
ea4a5ff8 | 1348 | |
faff8758 JS |
1349 | /* |
1350 | * Flush any TLB entries associated with the given GVA. | |
1351 | * Does not need to flush GPA->HPA mappings. | |
1352 | * Can potentially get non-canonical addresses through INVLPGs, which | |
1353 | * the implementation may choose to ignore if appropriate. | |
1354 | */ | |
1355 | void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); | |
ea4a5ff8 | 1356 | |
e64419d9 SC |
1357 | /* |
1358 | * Flush any TLB entries created by the guest. Like tlb_flush_gva(), | |
1359 | * does not need to flush GPA->HPA mappings. | |
1360 | */ | |
1361 | void (*tlb_flush_guest)(struct kvm_vcpu *vcpu); | |
1362 | ||
a9ab13ff | 1363 | enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu); |
1e9e2622 WL |
1364 | int (*handle_exit)(struct kvm_vcpu *vcpu, |
1365 | enum exit_fastpath_completion exit_fastpath); | |
f8ea7c60 | 1366 | int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
5ef8acbd | 1367 | void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); |
2809f5d2 | 1368 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
37ccdcbe | 1369 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1370 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, |
1371 | unsigned char *hypercall_addr); | |
66fd3f7f | 1372 | void (*set_irq)(struct kvm_vcpu *vcpu); |
95ba8273 | 1373 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
cfcd20e5 | 1374 | void (*queue_exception)(struct kvm_vcpu *vcpu); |
b463a6f7 | 1375 | void (*cancel_injection)(struct kvm_vcpu *vcpu); |
c9d40913 PB |
1376 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); |
1377 | int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); | |
3cfc3092 JK |
1378 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
1379 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | |
c9a7953f JK |
1380 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
1381 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | |
95ba8273 | 1382 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
ef8efd7a | 1383 | bool (*check_apicv_inhibit_reasons)(ulong bit); |
d62caabb | 1384 | void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); |
c7c9c56c | 1385 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
67c9dddc | 1386 | void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); |
e6c67d8c | 1387 | bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); |
6308630b | 1388 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
8d860bbe | 1389 | void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); |
a4148b7c | 1390 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); |
91a5f413 | 1391 | int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
76dfafd5 | 1392 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1393 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
2ac52ab8 | 1394 | int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); |
4b12f0de | 1395 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
344f414f | 1396 | |
e83bc09c SC |
1397 | void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, |
1398 | int root_level); | |
727a7e27 | 1399 | |
f5f48ee1 SY |
1400 | bool (*has_wbinvd_exit)(void); |
1401 | ||
307a94c7 IS |
1402 | u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); |
1403 | u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); | |
edcfe540 | 1404 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
1ab9287a | 1405 | void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); |
99e3e30a | 1406 | |
235ba74f SC |
1407 | /* |
1408 | * Retrieve somewhat arbitrary exit information. Intended to be used | |
1409 | * only from within tracepoints to avoid VMREADs when tracing is off. | |
1410 | */ | |
1411 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, | |
1412 | u32 *exit_int_info, u32 *exit_int_info_err_code); | |
8a76d7f2 JR |
1413 | |
1414 | int (*check_intercept)(struct kvm_vcpu *vcpu, | |
1415 | struct x86_instruction_info *info, | |
21f1b8f2 SC |
1416 | enum x86_intercept_stage stage, |
1417 | struct x86_exception *exception); | |
a9ab13ff | 1418 | void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); |
7f5581f5 | 1419 | |
d264ee0c | 1420 | void (*request_immediate_exit)(struct kvm_vcpu *vcpu); |
ae97a3b8 RK |
1421 | |
1422 | void (*sched_in)(struct kvm_vcpu *kvm, int cpu); | |
88178fd4 KH |
1423 | |
1424 | /* | |
a018eba5 SC |
1425 | * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero |
1426 | * value indicates CPU dirty logging is unsupported or disabled. | |
88178fd4 | 1427 | */ |
6dd03800 | 1428 | int cpu_dirty_log_size; |
a85863c2 | 1429 | void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); |
bab4165e | 1430 | |
25462f7f WH |
1431 | /* pmu operations of sub-arch */ |
1432 | const struct kvm_pmu_ops *pmu_ops; | |
33b22172 | 1433 | const struct kvm_x86_nested_ops *nested_ops; |
efc64404 | 1434 | |
bf9f6ac8 FW |
1435 | /* |
1436 | * Architecture specific hooks for vCPU blocking due to | |
1437 | * HLT instruction. | |
1438 | * Returns for .pre_block(): | |
1439 | * - 0 means continue to block the vCPU. | |
1440 | * - 1 means we cannot block the vCPU since some event | |
1441 | * happens during this period, such as, 'ON' bit in | |
1442 | * posted-interrupts descriptor is set. | |
1443 | */ | |
1444 | int (*pre_block)(struct kvm_vcpu *vcpu); | |
1445 | void (*post_block)(struct kvm_vcpu *vcpu); | |
d1ed092f SS |
1446 | |
1447 | void (*vcpu_blocking)(struct kvm_vcpu *vcpu); | |
1448 | void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); | |
1449 | ||
efc64404 FW |
1450 | int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, |
1451 | uint32_t guest_irq, bool set); | |
57ab8794 | 1452 | void (*start_assignment)(struct kvm *kvm); |
be8ca170 | 1453 | void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); |
17e433b5 | 1454 | bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); |
ce7a058a | 1455 | |
f9927982 SC |
1456 | int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, |
1457 | bool *expired); | |
ce7a058a | 1458 | void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); |
c45dcc71 AR |
1459 | |
1460 | void (*setup_mce)(struct kvm_vcpu *vcpu); | |
0234bf88 | 1461 | |
c9d40913 | 1462 | int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); |
ecc513e5 SC |
1463 | int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); |
1464 | int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); | |
c9d40913 | 1465 | void (*enable_smi_window)(struct kvm_vcpu *vcpu); |
5acc5c06 BS |
1466 | |
1467 | int (*mem_enc_op)(struct kvm *kvm, void __user *argp); | |
69eaedee BS |
1468 | int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); |
1469 | int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); | |
54526d1f | 1470 | int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); |
801e459a TL |
1471 | |
1472 | int (*get_msr_feature)(struct kvm_msr_entry *entry); | |
57b119da | 1473 | |
09e3e2a1 | 1474 | bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len); |
4b9852f4 LA |
1475 | |
1476 | bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); | |
344c6c80 | 1477 | int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); |
93dff2fe JM |
1478 | |
1479 | void (*migrate_timers)(struct kvm_vcpu *vcpu); | |
51de8151 | 1480 | void (*msr_filter_changed)(struct kvm_vcpu *vcpu); |
f9a4d621 | 1481 | int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); |
647daca2 TL |
1482 | |
1483 | void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); | |
ea4a5ff8 ZX |
1484 | }; |
1485 | ||
33b22172 PB |
1486 | struct kvm_x86_nested_ops { |
1487 | int (*check_events)(struct kvm_vcpu *vcpu); | |
d2060bd4 | 1488 | bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); |
cb6a32c2 | 1489 | void (*triple_fault)(struct kvm_vcpu *vcpu); |
33b22172 PB |
1490 | int (*get_state)(struct kvm_vcpu *vcpu, |
1491 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1492 | unsigned user_data_size); | |
1493 | int (*set_state)(struct kvm_vcpu *vcpu, | |
1494 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1495 | struct kvm_nested_state *kvm_state); | |
729c15c2 | 1496 | bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu); |
02f5fb2e | 1497 | int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); |
33b22172 PB |
1498 | |
1499 | int (*enable_evmcs)(struct kvm_vcpu *vcpu, | |
1500 | uint16_t *vmcs_version); | |
1501 | uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); | |
ea4a5ff8 ZX |
1502 | }; |
1503 | ||
d008dfdb SC |
1504 | struct kvm_x86_init_ops { |
1505 | int (*cpu_has_kvm_support)(void); | |
1506 | int (*disabled_by_bios)(void); | |
1507 | int (*check_processor_compatibility)(void); | |
1508 | int (*hardware_setup)(void); | |
1509 | ||
1510 | struct kvm_x86_ops *runtime_ops; | |
1511 | }; | |
1512 | ||
af585b92 | 1513 | struct kvm_arch_async_pf { |
7c90705b | 1514 | u32 token; |
af585b92 | 1515 | gfn_t gfn; |
fb67e14f | 1516 | unsigned long cr3; |
c4806acd | 1517 | bool direct_map; |
af585b92 GN |
1518 | }; |
1519 | ||
9cc39a5a | 1520 | extern u32 __read_mostly kvm_nr_uret_msrs; |
91661989 | 1521 | extern u64 __read_mostly host_efer; |
3edd6839 | 1522 | extern bool __read_mostly allow_smaller_maxphyaddr; |
fdf513e3 | 1523 | extern bool __read_mostly enable_apicv; |
afaf0b2f | 1524 | extern struct kvm_x86_ops kvm_x86_ops; |
97896d04 | 1525 | |
9af5471b JB |
1526 | #define KVM_X86_OP(func) \ |
1527 | DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func)); | |
1528 | #define KVM_X86_OP_NULL KVM_X86_OP | |
1529 | #include <asm/kvm-x86-ops.h> | |
1530 | ||
1531 | static inline void kvm_ops_static_call_update(void) | |
1532 | { | |
1533 | #define KVM_X86_OP(func) \ | |
1534 | static_call_update(kvm_x86_##func, kvm_x86_ops.func); | |
1535 | #define KVM_X86_OP_NULL KVM_X86_OP | |
1536 | #include <asm/kvm-x86-ops.h> | |
1537 | } | |
1538 | ||
434a1e94 SC |
1539 | #define __KVM_HAVE_ARCH_VM_ALLOC |
1540 | static inline struct kvm *kvm_arch_alloc_vm(void) | |
1541 | { | |
88dca4ca | 1542 | return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
434a1e94 | 1543 | } |
562b6b08 | 1544 | void kvm_arch_free_vm(struct kvm *kvm); |
434a1e94 | 1545 | |
b08660e5 TL |
1546 | #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB |
1547 | static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) | |
1548 | { | |
afaf0b2f | 1549 | if (kvm_x86_ops.tlb_remote_flush && |
b3646477 | 1550 | !static_call(kvm_x86_tlb_remote_flush)(kvm)) |
b08660e5 TL |
1551 | return 0; |
1552 | else | |
1553 | return -ENOTSUPP; | |
1554 | } | |
1555 | ||
54f1585a ZX |
1556 | int kvm_mmu_module_init(void); |
1557 | void kvm_mmu_module_exit(void); | |
1558 | ||
1559 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | |
1560 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | |
13d268ca XG |
1561 | void kvm_mmu_init_vm(struct kvm *kvm); |
1562 | void kvm_mmu_uninit_vm(struct kvm *kvm); | |
54f1585a | 1563 | |
49c6f875 | 1564 | void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); |
8a3c1a33 | 1565 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
1c91cad4 | 1566 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
269e9552 | 1567 | const struct kvm_memory_slot *memslot, |
3c9bd400 | 1568 | int start_level); |
3ea3b7fa | 1569 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, |
f36f3f28 | 1570 | const struct kvm_memory_slot *memslot); |
f4b4b180 | 1571 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
269e9552 | 1572 | const struct kvm_memory_slot *memslot); |
54f1585a | 1573 | void kvm_mmu_zap_all(struct kvm *kvm); |
15248258 | 1574 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); |
bc8a3d89 BG |
1575 | unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); |
1576 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); | |
54f1585a | 1577 | |
ff03a073 | 1578 | int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); |
cc4b6871 | 1579 | |
3200f405 | 1580 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
9f811285 | 1581 | const void *val, int bytes); |
2f333bcb | 1582 | |
6ef768fa PB |
1583 | struct kvm_irq_mask_notifier { |
1584 | void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); | |
1585 | int irq; | |
1586 | struct hlist_node link; | |
1587 | }; | |
1588 | ||
1589 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | |
1590 | struct kvm_irq_mask_notifier *kimn); | |
1591 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |
1592 | struct kvm_irq_mask_notifier *kimn); | |
1593 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, | |
1594 | bool mask); | |
1595 | ||
2f333bcb | 1596 | extern bool tdp_enabled; |
9f811285 | 1597 | |
a3e06bbe LJ |
1598 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); |
1599 | ||
92a1f12d JR |
1600 | /* control of guest tsc rate supported? */ |
1601 | extern bool kvm_has_tsc_control; | |
92a1f12d JR |
1602 | /* maximum supported tsc_khz for guests */ |
1603 | extern u32 kvm_max_guest_tsc_khz; | |
bc9b961b HZ |
1604 | /* number of bits of the fractional part of the TSC scaling ratio */ |
1605 | extern u8 kvm_tsc_scaling_ratio_frac_bits; | |
1606 | /* maximum allowed value of TSC scaling ratio */ | |
1607 | extern u64 kvm_max_tsc_scaling_ratio; | |
64672c95 YJ |
1608 | /* 1ull << kvm_tsc_scaling_ratio_frac_bits */ |
1609 | extern u64 kvm_default_tsc_scaling_ratio; | |
fe6b6bc8 CQ |
1610 | /* bus lock detection supported? */ |
1611 | extern bool kvm_has_bus_lock_exit; | |
92a1f12d | 1612 | |
c45dcc71 | 1613 | extern u64 kvm_mce_cap_supported; |
92a1f12d | 1614 | |
41577ab8 SC |
1615 | /* |
1616 | * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing | |
1617 | * userspace I/O) to indicate that the emulation context | |
d9f6e12f | 1618 | * should be reused as is, i.e. skip initialization of |
41577ab8 SC |
1619 | * emulation context, instruction fetch and decode. |
1620 | * | |
1621 | * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. | |
1622 | * Indicates that only select instructions (tagged with | |
1623 | * EmulateOnUD) should be emulated (to minimize the emulator | |
1624 | * attack surface). See also EMULTYPE_TRAP_UD_FORCED. | |
1625 | * | |
1626 | * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to | |
1627 | * decode the instruction length. For use *only* by | |
afaf0b2f | 1628 | * kvm_x86_ops.skip_emulated_instruction() implementations. |
41577ab8 | 1629 | * |
92daa48b SC |
1630 | * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to |
1631 | * retry native execution under certain conditions, | |
1632 | * Can only be set in conjunction with EMULTYPE_PF. | |
41577ab8 SC |
1633 | * |
1634 | * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was | |
1635 | * triggered by KVM's magic "force emulation" prefix, | |
1636 | * which is opt in via module param (off by default). | |
1637 | * Bypasses EmulateOnUD restriction despite emulating | |
1638 | * due to an intercepted #UD (see EMULTYPE_TRAP_UD). | |
1639 | * Used to test the full emulator from userspace. | |
1640 | * | |
1641 | * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware | |
1642 | * backdoor emulation, which is opt in via module param. | |
d9f6e12f | 1643 | * VMware backdoor emulation handles select instructions |
41577ab8 | 1644 | * and reinjects the #GP for all other cases. |
92daa48b SC |
1645 | * |
1646 | * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which | |
1647 | * case the CR2/GPA value pass on the stack is valid. | |
41577ab8 | 1648 | */ |
571008da SY |
1649 | #define EMULTYPE_NO_DECODE (1 << 0) |
1650 | #define EMULTYPE_TRAP_UD (1 << 1) | |
ba8afb6b | 1651 | #define EMULTYPE_SKIP (1 << 2) |
92daa48b | 1652 | #define EMULTYPE_ALLOW_RETRY_PF (1 << 3) |
b4000606 | 1653 | #define EMULTYPE_TRAP_UD_FORCED (1 << 4) |
42cbf068 | 1654 | #define EMULTYPE_VMWARE_GP (1 << 5) |
92daa48b SC |
1655 | #define EMULTYPE_PF (1 << 6) |
1656 | ||
c60658d1 SC |
1657 | int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); |
1658 | int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, | |
1659 | void *insn, int insn_len); | |
35be0ade | 1660 | |
f2b4b7dd | 1661 | void kvm_enable_efer_bits(u64); |
384bb783 | 1662 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); |
edef5c36 | 1663 | int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); |
f20935d8 SC |
1664 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); |
1665 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); | |
1edce0a9 SC |
1666 | int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); |
1667 | int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); | |
5ff3a351 SC |
1668 | int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); |
1669 | int kvm_emulate_invd(struct kvm_vcpu *vcpu); | |
1670 | int kvm_emulate_mwait(struct kvm_vcpu *vcpu); | |
1671 | int kvm_handle_invalid_op(struct kvm_vcpu *vcpu); | |
1672 | int kvm_emulate_monitor(struct kvm_vcpu *vcpu); | |
54f1585a | 1673 | |
dca7f128 | 1674 | int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); |
6a908b62 | 1675 | int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
54f1585a | 1676 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
5cb56059 | 1677 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu); |
647daca2 | 1678 | int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); |
f5f48ee1 | 1679 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
54f1585a | 1680 | |
3e6e0aab | 1681 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
c697518a | 1682 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
2b4a273b | 1683 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); |
3e6e0aab | 1684 | |
7f3d35fd KW |
1685 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
1686 | int reason, bool has_error_code, u32 error_code); | |
37817f29 | 1687 | |
ed02b213 TL |
1688 | void kvm_free_guest_fpu(struct kvm_vcpu *vcpu); |
1689 | ||
f27ad38a | 1690 | void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); |
5b51cb13 | 1691 | void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); |
49a9b07e | 1692 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
2390218b | 1693 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
a83b29c6 | 1694 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
eea1cff9 | 1695 | int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
020df079 | 1696 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); |
29d6ca41 | 1697 | void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); |
2d3ad1f4 AK |
1698 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
1699 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | |
54f1585a | 1700 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
92f9895c | 1701 | int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); |
54f1585a | 1702 | |
609e36d3 | 1703 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
8fe8ab46 | 1704 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
54f1585a | 1705 | |
91586a3b JK |
1706 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); |
1707 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |
c483c454 | 1708 | int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); |
91586a3b | 1709 | |
298101da AK |
1710 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
1711 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | |
4d5523cf | 1712 | void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); |
ce7ddec4 JR |
1713 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
1714 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | |
6389ee94 | 1715 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
53b3d8e9 SC |
1716 | bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, |
1717 | struct x86_exception *fault); | |
ec92fe44 JR |
1718 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
1719 | gfn_t gfn, void *data, int offset, int len, | |
1720 | u32 access); | |
0a79b009 | 1721 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
16f8a6f9 | 1722 | bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); |
298101da | 1723 | |
1a577b72 MT |
1724 | static inline int __kvm_irq_line_state(unsigned long *irq_state, |
1725 | int irq_source_id, int level) | |
1726 | { | |
1727 | /* Logical OR for level trig interrupt */ | |
1728 | if (level) | |
1729 | __set_bit(irq_source_id, irq_state); | |
1730 | else | |
1731 | __clear_bit(irq_source_id, irq_state); | |
1732 | ||
1733 | return !!(*irq_state); | |
1734 | } | |
1735 | ||
b94742c9 JS |
1736 | #define KVM_MMU_ROOT_CURRENT BIT(0) |
1737 | #define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) | |
1738 | #define KVM_MMU_ROOTS_ALL (~0UL) | |
08fb59d8 | 1739 | |
1a577b72 MT |
1740 | int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); |
1741 | void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | |
3de42dc0 | 1742 | |
3419ffc8 SY |
1743 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
1744 | ||
7c86663b PB |
1745 | void kvm_update_dr7(struct kvm_vcpu *vcpu); |
1746 | ||
1cb3f3ae | 1747 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
54f1585a | 1748 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
6a82cd1c VK |
1749 | void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
1750 | ulong roots_to_free); | |
25b62c62 | 1751 | void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu); |
54987b7a PB |
1752 | gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, |
1753 | struct x86_exception *exception); | |
ab9ae313 AK |
1754 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
1755 | struct x86_exception *exception); | |
1756 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, | |
1757 | struct x86_exception *exception); | |
1758 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, | |
1759 | struct x86_exception *exception); | |
1760 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, | |
1761 | struct x86_exception *exception); | |
54f1585a | 1762 | |
4e19c36f | 1763 | bool kvm_apicv_activated(struct kvm *kvm); |
8df14af4 SS |
1764 | void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); |
1765 | void kvm_request_apicv_update(struct kvm *kvm, bool activate, | |
1766 | unsigned long bit); | |
d62caabb | 1767 | |
b0a1637f ML |
1768 | void __kvm_request_apicv_update(struct kvm *kvm, bool activate, |
1769 | unsigned long bit); | |
1770 | ||
54f1585a ZX |
1771 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
1772 | ||
736c291c | 1773 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, |
dc25e89e | 1774 | void *insn, int insn_len); |
a7052897 | 1775 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); |
5efac074 PB |
1776 | void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
1777 | gva_t gva, hpa_t root_hpa); | |
eb4b248e | 1778 | void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); |
b5129100 | 1779 | void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); |
34c16eec | 1780 | |
746700d2 WH |
1781 | void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, |
1782 | int tdp_max_root_level, int tdp_huge_page_level); | |
18552672 | 1783 | |
d6e88aec | 1784 | static inline u16 kvm_read_ldt(void) |
ec6d273d ZX |
1785 | { |
1786 | u16 ldt; | |
1787 | asm("sldt %0" : "=g"(ldt)); | |
1788 | return ldt; | |
1789 | } | |
1790 | ||
d6e88aec | 1791 | static inline void kvm_load_ldt(u16 sel) |
ec6d273d ZX |
1792 | { |
1793 | asm("lldt %0" : : "rm"(sel)); | |
1794 | } | |
ec6d273d | 1795 | |
ec6d273d ZX |
1796 | #ifdef CONFIG_X86_64 |
1797 | static inline unsigned long read_msr(unsigned long msr) | |
1798 | { | |
1799 | u64 value; | |
1800 | ||
1801 | rdmsrl(msr, value); | |
1802 | return value; | |
1803 | } | |
1804 | #endif | |
1805 | ||
c1a5d4f9 AK |
1806 | static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) |
1807 | { | |
1808 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | |
1809 | } | |
1810 | ||
ec6d273d ZX |
1811 | #define TSS_IOPB_BASE_OFFSET 0x66 |
1812 | #define TSS_BASE_SIZE 0x68 | |
1813 | #define TSS_IOPB_SIZE (65536 / 8) | |
1814 | #define TSS_REDIRECTION_SIZE (256 / 8) | |
7d76b4d3 JP |
1815 | #define RMODE_TSS_SIZE \ |
1816 | (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) | |
53e0aa7b | 1817 | |
37817f29 IE |
1818 | enum { |
1819 | TASK_SWITCH_CALL = 0, | |
1820 | TASK_SWITCH_IRET = 1, | |
1821 | TASK_SWITCH_JMP = 2, | |
1822 | TASK_SWITCH_GATE = 3, | |
1823 | }; | |
1824 | ||
1371d904 | 1825 | #define HF_GIF_MASK (1 << 0) |
95ba8273 | 1826 | #define HF_NMI_MASK (1 << 3) |
44c11430 | 1827 | #define HF_IRET_MASK (1 << 4) |
ec9e60b2 | 1828 | #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ |
f077825a PB |
1829 | #define HF_SMM_MASK (1 << 6) |
1830 | #define HF_SMM_INSIDE_NMI_MASK (1 << 7) | |
1371d904 | 1831 | |
699023e2 PB |
1832 | #define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE |
1833 | #define KVM_ADDRESS_SPACE_NUM 2 | |
1834 | ||
1835 | #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) | |
1836 | #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) | |
1371d904 | 1837 | |
e930bffe | 1838 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
5f7c292b | 1839 | |
c7c9c56c | 1840 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); |
a1b37100 | 1841 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
71cc849b | 1842 | int kvm_cpu_has_extint(struct kvm_vcpu *v); |
a1b37100 | 1843 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
0b71785d | 1844 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
d28bc9dd | 1845 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); |
4256f43f | 1846 | void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); |
e930bffe | 1847 | |
4180bf1b | 1848 | int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, |
bdf7ffc8 | 1849 | unsigned long ipi_bitmap_high, u32 min, |
4180bf1b WL |
1850 | unsigned long icr, int op_64_bit); |
1851 | ||
e5fda4bb | 1852 | int kvm_add_user_return_msr(u32 msr); |
8ea8b8d6 | 1853 | int kvm_find_user_return_msr(u32 msr); |
7e34fbd0 | 1854 | int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); |
18863bdd | 1855 | |
61a05d44 SC |
1856 | static inline bool kvm_is_supported_user_return_msr(u32 msr) |
1857 | { | |
1858 | return kvm_find_user_return_msr(msr) >= 0; | |
1859 | } | |
1860 | ||
fe3eb504 | 1861 | u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio); |
4ba76538 | 1862 | u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); |
83150f29 IS |
1863 | u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); |
1864 | u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); | |
35181e86 | 1865 | |
82b32774 | 1866 | unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); |
f92653ee JK |
1867 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
1868 | ||
2860c4b1 PB |
1869 | void kvm_make_mclock_inprogress_request(struct kvm *kvm); |
1870 | void kvm_make_scan_ioapic_request(struct kvm *kvm); | |
7ee30bc1 NNL |
1871 | void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, |
1872 | unsigned long *vcpu_bitmap); | |
2860c4b1 | 1873 | |
2a18b7e7 | 1874 | bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, |
af585b92 GN |
1875 | struct kvm_async_pf *work); |
1876 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |
1877 | struct kvm_async_pf *work); | |
56028d08 GN |
1878 | void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, |
1879 | struct kvm_async_pf *work); | |
557a961a | 1880 | void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); |
7c0ade6c | 1881 | bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); |
af585b92 GN |
1882 | extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); |
1883 | ||
6affcbed KH |
1884 | int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); |
1885 | int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | |
d264ee0c | 1886 | void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); |
db8fcefa | 1887 | |
f5132b01 GN |
1888 | int kvm_is_in_guest(void); |
1889 | ||
ff5a983c PX |
1890 | void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, |
1891 | u32 size); | |
d71ba788 PB |
1892 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); |
1893 | bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); | |
f5132b01 | 1894 | |
8feb4a04 FW |
1895 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, |
1896 | struct kvm_vcpu **dest_vcpu); | |
1897 | ||
37131313 | 1898 | void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, |
d84f1e07 | 1899 | struct kvm_lapic_irq *irq); |
197a4f4b | 1900 | |
fdcf7562 AG |
1901 | static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) |
1902 | { | |
1903 | /* We can only post Fixed and LowPrio IRQs */ | |
637543a8 SS |
1904 | return (irq->delivery_mode == APIC_DM_FIXED || |
1905 | irq->delivery_mode == APIC_DM_LOWEST); | |
fdcf7562 AG |
1906 | } |
1907 | ||
d1ed092f SS |
1908 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) |
1909 | { | |
b3646477 | 1910 | static_call_cond(kvm_x86_vcpu_blocking)(vcpu); |
d1ed092f SS |
1911 | } |
1912 | ||
1913 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) | |
1914 | { | |
b3646477 | 1915 | static_call_cond(kvm_x86_vcpu_unblocking)(vcpu); |
d1ed092f SS |
1916 | } |
1917 | ||
3491caf2 | 1918 | static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} |
3217f7c2 | 1919 | |
7d669f50 SS |
1920 | static inline int kvm_cpu_get_apicid(int mps_cpu) |
1921 | { | |
1922 | #ifdef CONFIG_X86_LOCAL_APIC | |
64063505 | 1923 | return default_cpu_present_to_apicid(mps_cpu); |
7d669f50 SS |
1924 | #else |
1925 | WARN_ON_ONCE(1); | |
1926 | return BAD_APICID; | |
1927 | #endif | |
1928 | } | |
1929 | ||
05cade71 LP |
1930 | #define put_smstate(type, buf, offset, val) \ |
1931 | *(type *)((buf) + (offset) - 0x7e00) = val | |
1932 | ||
ed19321f SC |
1933 | #define GET_SMSTATE(type, buf, offset) \ |
1934 | (*(type *)((buf) + (offset) - 0x7e00)) | |
1935 | ||
fb04a1ed PX |
1936 | int kvm_cpu_dirty_log_size(void); |
1937 | ||
d501f747 BG |
1938 | int alloc_all_memslots_rmaps(struct kvm *kvm); |
1939 | ||
1965aae3 | 1940 | #endif /* _ASM_X86_KVM_HOST_H */ |