]> git.proxmox.com Git - mirror_qemu.git/blame - target-i386/kvm.c
target-i386/kvm: Hyper-V HV_X64_MSR_RESET support
[mirror_qemu.git] / target-i386 / kvm.c
CommitLineData
05330448
AL
1/*
2 * QEMU KVM support
3 *
4 * Copyright (C) 2006-2008 Qumranet Technologies
5 * Copyright IBM, Corp. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11 * See the COPYING file in the top-level directory.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/ioctl.h>
17#include <sys/mman.h>
25d2e361 18#include <sys/utsname.h>
05330448
AL
19
20#include <linux/kvm.h>
5802e066 21#include <linux/kvm_para.h>
05330448
AL
22
23#include "qemu-common.h"
9c17d615 24#include "sysemu/sysemu.h"
6410848b 25#include "sysemu/kvm_int.h"
1d31f66b 26#include "kvm_i386.h"
05330448 27#include "cpu.h"
022c62cb 28#include "exec/gdbstub.h"
1de7afc9
PB
29#include "qemu/host-utils.h"
30#include "qemu/config-file.h"
0d09e41a
PB
31#include "hw/i386/pc.h"
32#include "hw/i386/apic.h"
e0723c45
PB
33#include "hw/i386/apic_internal.h"
34#include "hw/i386/apic-msidef.h"
022c62cb 35#include "exec/ioport.h"
73aa529a 36#include "standard-headers/asm-x86/hyperv.h"
a2cb15b0 37#include "hw/pci/pci.h"
68bfd0ad 38#include "migration/migration.h"
4c663752 39#include "exec/memattrs.h"
05330448
AL
40
41//#define DEBUG_KVM
42
43#ifdef DEBUG_KVM
8c0d577e 44#define DPRINTF(fmt, ...) \
05330448
AL
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46#else
8c0d577e 47#define DPRINTF(fmt, ...) \
05330448
AL
48 do { } while (0)
49#endif
50
1a03675d
GC
51#define MSR_KVM_WALL_CLOCK 0x11
52#define MSR_KVM_SYSTEM_TIME 0x12
53
c0532a76
MT
54#ifndef BUS_MCEERR_AR
55#define BUS_MCEERR_AR 4
56#endif
57#ifndef BUS_MCEERR_AO
58#define BUS_MCEERR_AO 5
59#endif
60
94a8d39a
JK
61const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
62 KVM_CAP_INFO(SET_TSS_ADDR),
63 KVM_CAP_INFO(EXT_CPUID),
64 KVM_CAP_INFO(MP_STATE),
65 KVM_CAP_LAST_INFO
66};
25d2e361 67
c3a3a7d3
JK
68static bool has_msr_star;
69static bool has_msr_hsave_pa;
c9b8f6b6 70static bool has_msr_tsc_aux;
f28558d3 71static bool has_msr_tsc_adjust;
aa82ba54 72static bool has_msr_tsc_deadline;
df67696e 73static bool has_msr_feature_control;
c5999bfc 74static bool has_msr_async_pf_en;
bc9a839d 75static bool has_msr_pv_eoi_en;
21e87c46 76static bool has_msr_misc_enable;
fc12d72e 77static bool has_msr_smbase;
79e9ebeb 78static bool has_msr_bndcfgs;
917367aa 79static bool has_msr_kvm_steal_time;
25d2e361 80static int lm_capable_kernel;
7bc3d711
PB
81static bool has_msr_hv_hypercall;
82static bool has_msr_hv_vapic;
48a5f3bc 83static bool has_msr_hv_tsc;
f2a53c9e 84static bool has_msr_hv_crash;
744b8a94 85static bool has_msr_hv_reset;
d1ae67f6 86static bool has_msr_mtrr;
18cd2c17 87static bool has_msr_xss;
b827df58 88
0d894367
PB
89static bool has_msr_architectural_pmu;
90static uint32_t num_architectural_pmu_counters;
91
355023f2
PB
92bool kvm_has_smm(void)
93{
94 return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
95}
96
1d31f66b
PM
97bool kvm_allows_irq0_override(void)
98{
99 return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
100}
101
b827df58
AK
102static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
103{
104 struct kvm_cpuid2 *cpuid;
105 int r, size;
106
107 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
e42a92ae 108 cpuid = g_malloc0(size);
b827df58
AK
109 cpuid->nent = max;
110 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
76ae317f
MM
111 if (r == 0 && cpuid->nent >= max) {
112 r = -E2BIG;
113 }
b827df58
AK
114 if (r < 0) {
115 if (r == -E2BIG) {
7267c094 116 g_free(cpuid);
b827df58
AK
117 return NULL;
118 } else {
119 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
120 strerror(-r));
121 exit(1);
122 }
123 }
124 return cpuid;
125}
126
dd87f8a6
EH
127/* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough
128 * for all entries.
129 */
130static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
131{
132 struct kvm_cpuid2 *cpuid;
133 int max = 1;
134 while ((cpuid = try_get_cpuid(s, max)) == NULL) {
135 max *= 2;
136 }
137 return cpuid;
138}
139
a443bc34 140static const struct kvm_para_features {
0c31b744
GC
141 int cap;
142 int feature;
143} para_features[] = {
144 { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
145 { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
146 { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
0c31b744 147 { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
0c31b744
GC
148};
149
ba9bc59e 150static int get_para_features(KVMState *s)
0c31b744
GC
151{
152 int i, features = 0;
153
8e03c100 154 for (i = 0; i < ARRAY_SIZE(para_features); i++) {
ba9bc59e 155 if (kvm_check_extension(s, para_features[i].cap)) {
0c31b744
GC
156 features |= (1 << para_features[i].feature);
157 }
158 }
159
160 return features;
161}
0c31b744
GC
162
163
829ae2f9
EH
164/* Returns the value for a specific register on the cpuid entry
165 */
166static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
167{
168 uint32_t ret = 0;
169 switch (reg) {
170 case R_EAX:
171 ret = entry->eax;
172 break;
173 case R_EBX:
174 ret = entry->ebx;
175 break;
176 case R_ECX:
177 ret = entry->ecx;
178 break;
179 case R_EDX:
180 ret = entry->edx;
181 break;
182 }
183 return ret;
184}
185
4fb73f1d
EH
186/* Find matching entry for function/index on kvm_cpuid2 struct
187 */
188static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
189 uint32_t function,
190 uint32_t index)
191{
192 int i;
193 for (i = 0; i < cpuid->nent; ++i) {
194 if (cpuid->entries[i].function == function &&
195 cpuid->entries[i].index == index) {
196 return &cpuid->entries[i];
197 }
198 }
199 /* not found: */
200 return NULL;
201}
202
ba9bc59e 203uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
c958a8bd 204 uint32_t index, int reg)
b827df58
AK
205{
206 struct kvm_cpuid2 *cpuid;
b827df58
AK
207 uint32_t ret = 0;
208 uint32_t cpuid_1_edx;
8c723b79 209 bool found = false;
b827df58 210
dd87f8a6 211 cpuid = get_supported_cpuid(s);
b827df58 212
4fb73f1d
EH
213 struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index);
214 if (entry) {
215 found = true;
216 ret = cpuid_entry_get_reg(entry, reg);
b827df58
AK
217 }
218
7b46e5ce
EH
219 /* Fixups for the data returned by KVM, below */
220
c2acb022
EH
221 if (function == 1 && reg == R_EDX) {
222 /* KVM before 2.6.30 misreports the following features */
223 ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA;
84bd945c
EH
224 } else if (function == 1 && reg == R_ECX) {
225 /* We can set the hypervisor flag, even if KVM does not return it on
226 * GET_SUPPORTED_CPUID
227 */
228 ret |= CPUID_EXT_HYPERVISOR;
ac67ee26
EH
229 /* tsc-deadline flag is not returned by GET_SUPPORTED_CPUID, but it
230 * can be enabled if the kernel has KVM_CAP_TSC_DEADLINE_TIMER,
231 * and the irqchip is in the kernel.
232 */
233 if (kvm_irqchip_in_kernel() &&
234 kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
235 ret |= CPUID_EXT_TSC_DEADLINE_TIMER;
236 }
41e5e76d
EH
237
238 /* x2apic is reported by GET_SUPPORTED_CPUID, but it can't be enabled
239 * without the in-kernel irqchip
240 */
241 if (!kvm_irqchip_in_kernel()) {
242 ret &= ~CPUID_EXT_X2APIC;
b827df58 243 }
28b8e4d0
JK
244 } else if (function == 6 && reg == R_EAX) {
245 ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
c2acb022
EH
246 } else if (function == 0x80000001 && reg == R_EDX) {
247 /* On Intel, kvm returns cpuid according to the Intel spec,
248 * so add missing bits according to the AMD spec:
249 */
250 cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
251 ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
b827df58
AK
252 }
253
7267c094 254 g_free(cpuid);
b827df58 255
0c31b744 256 /* fallback for older kernels */
8c723b79 257 if ((function == KVM_CPUID_FEATURES) && !found) {
ba9bc59e 258 ret = get_para_features(s);
b9bec74b 259 }
0c31b744
GC
260
261 return ret;
bb0300dc 262}
bb0300dc 263
3c85e74f
HY
264typedef struct HWPoisonPage {
265 ram_addr_t ram_addr;
266 QLIST_ENTRY(HWPoisonPage) list;
267} HWPoisonPage;
268
269static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
270 QLIST_HEAD_INITIALIZER(hwpoison_page_list);
271
272static void kvm_unpoison_all(void *param)
273{
274 HWPoisonPage *page, *next_page;
275
276 QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
277 QLIST_REMOVE(page, list);
278 qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
7267c094 279 g_free(page);
3c85e74f
HY
280 }
281}
282
3c85e74f
HY
283static void kvm_hwpoison_page_add(ram_addr_t ram_addr)
284{
285 HWPoisonPage *page;
286
287 QLIST_FOREACH(page, &hwpoison_page_list, list) {
288 if (page->ram_addr == ram_addr) {
289 return;
290 }
291 }
ab3ad07f 292 page = g_new(HWPoisonPage, 1);
3c85e74f
HY
293 page->ram_addr = ram_addr;
294 QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
295}
296
e7701825
MT
297static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
298 int *max_banks)
299{
300 int r;
301
14a09518 302 r = kvm_check_extension(s, KVM_CAP_MCE);
e7701825
MT
303 if (r > 0) {
304 *max_banks = r;
305 return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
306 }
307 return -ENOSYS;
308}
309
bee615d4 310static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
e7701825 311{
bee615d4 312 CPUX86State *env = &cpu->env;
c34d440a
JK
313 uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
314 MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
315 uint64_t mcg_status = MCG_STATUS_MCIP;
e7701825 316
c34d440a
JK
317 if (code == BUS_MCEERR_AR) {
318 status |= MCI_STATUS_AR | 0x134;
319 mcg_status |= MCG_STATUS_EIPV;
320 } else {
321 status |= 0xc0;
322 mcg_status |= MCG_STATUS_RIPV;
419fb20a 323 }
8c5cf3b6 324 cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
c34d440a
JK
325 (MCM_ADDR_PHYS << 6) | 0xc,
326 cpu_x86_support_mca_broadcast(env) ?
327 MCE_INJECT_BROADCAST : 0);
419fb20a 328}
419fb20a
JK
329
330static void hardware_memory_error(void)
331{
332 fprintf(stderr, "Hardware memory error!\n");
333 exit(1);
334}
335
20d695a9 336int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
419fb20a 337{
20d695a9
AF
338 X86CPU *cpu = X86_CPU(c);
339 CPUX86State *env = &cpu->env;
419fb20a 340 ram_addr_t ram_addr;
a8170e5e 341 hwaddr paddr;
419fb20a
JK
342
343 if ((env->mcg_cap & MCG_SER_P) && addr
c34d440a 344 && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
1b5ec234 345 if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
a60f24b5 346 !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
419fb20a
JK
347 fprintf(stderr, "Hardware memory error for memory used by "
348 "QEMU itself instead of guest system!\n");
349 /* Hope we are lucky for AO MCE */
350 if (code == BUS_MCEERR_AO) {
351 return 0;
352 } else {
353 hardware_memory_error();
354 }
355 }
3c85e74f 356 kvm_hwpoison_page_add(ram_addr);
bee615d4 357 kvm_mce_inject(cpu, paddr, code);
e56ff191 358 } else {
419fb20a
JK
359 if (code == BUS_MCEERR_AO) {
360 return 0;
361 } else if (code == BUS_MCEERR_AR) {
362 hardware_memory_error();
363 } else {
364 return 1;
365 }
366 }
367 return 0;
368}
369
370int kvm_arch_on_sigbus(int code, void *addr)
371{
182735ef
AF
372 X86CPU *cpu = X86_CPU(first_cpu);
373
374 if ((cpu->env.mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
419fb20a 375 ram_addr_t ram_addr;
a8170e5e 376 hwaddr paddr;
419fb20a
JK
377
378 /* Hope we are lucky for AO MCE */
1b5ec234 379 if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
182735ef 380 !kvm_physical_memory_addr_from_host(first_cpu->kvm_state,
a60f24b5 381 addr, &paddr)) {
419fb20a
JK
382 fprintf(stderr, "Hardware memory error for memory used by "
383 "QEMU itself instead of guest system!: %p\n", addr);
384 return 0;
385 }
3c85e74f 386 kvm_hwpoison_page_add(ram_addr);
182735ef 387 kvm_mce_inject(X86_CPU(first_cpu), paddr, code);
e56ff191 388 } else {
419fb20a
JK
389 if (code == BUS_MCEERR_AO) {
390 return 0;
391 } else if (code == BUS_MCEERR_AR) {
392 hardware_memory_error();
393 } else {
394 return 1;
395 }
396 }
397 return 0;
398}
e7701825 399
1bc22652 400static int kvm_inject_mce_oldstyle(X86CPU *cpu)
ab443475 401{
1bc22652
AF
402 CPUX86State *env = &cpu->env;
403
ab443475
JK
404 if (!kvm_has_vcpu_events() && env->exception_injected == EXCP12_MCHK) {
405 unsigned int bank, bank_num = env->mcg_cap & 0xff;
406 struct kvm_x86_mce mce;
407
408 env->exception_injected = -1;
409
410 /*
411 * There must be at least one bank in use if an MCE is pending.
412 * Find it and use its values for the event injection.
413 */
414 for (bank = 0; bank < bank_num; bank++) {
415 if (env->mce_banks[bank * 4 + 1] & MCI_STATUS_VAL) {
416 break;
417 }
418 }
419 assert(bank < bank_num);
420
421 mce.bank = bank;
422 mce.status = env->mce_banks[bank * 4 + 1];
423 mce.mcg_status = env->mcg_status;
424 mce.addr = env->mce_banks[bank * 4 + 2];
425 mce.misc = env->mce_banks[bank * 4 + 3];
426
1bc22652 427 return kvm_vcpu_ioctl(CPU(cpu), KVM_X86_SET_MCE, &mce);
ab443475 428 }
ab443475
JK
429 return 0;
430}
431
1dfb4dd9 432static void cpu_update_state(void *opaque, int running, RunState state)
b8cc45d6 433{
317ac620 434 CPUX86State *env = opaque;
b8cc45d6
GC
435
436 if (running) {
437 env->tsc_valid = false;
438 }
439}
440
83b17af5 441unsigned long kvm_arch_vcpu_id(CPUState *cs)
b164e48e 442{
83b17af5 443 X86CPU *cpu = X86_CPU(cs);
7e72a45c 444 return cpu->apic_id;
b164e48e
EH
445}
446
92067bf4
IM
447#ifndef KVM_CPUID_SIGNATURE_NEXT
448#define KVM_CPUID_SIGNATURE_NEXT 0x40000100
449#endif
450
451static bool hyperv_hypercall_available(X86CPU *cpu)
452{
453 return cpu->hyperv_vapic ||
454 (cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY);
455}
456
457static bool hyperv_enabled(X86CPU *cpu)
458{
7bc3d711
PB
459 CPUState *cs = CPU(cpu);
460 return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 &&
461 (hyperv_hypercall_available(cpu) ||
48a5f3bc 462 cpu->hyperv_time ||
f2a53c9e 463 cpu->hyperv_relaxed_timing ||
744b8a94
AS
464 cpu->hyperv_crash ||
465 cpu->hyperv_reset);
92067bf4
IM
466}
467
68bfd0ad
MT
468static Error *invtsc_mig_blocker;
469
f8bb0565 470#define KVM_MAX_CPUID_ENTRIES 100
0893d460 471
20d695a9 472int kvm_arch_init_vcpu(CPUState *cs)
05330448
AL
473{
474 struct {
486bd5a2 475 struct kvm_cpuid2 cpuid;
f8bb0565 476 struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
541dc0d4 477 } QEMU_PACKED cpuid_data;
20d695a9
AF
478 X86CPU *cpu = X86_CPU(cs);
479 CPUX86State *env = &cpu->env;
486bd5a2 480 uint32_t limit, i, j, cpuid_i;
a33609ca 481 uint32_t unused;
bb0300dc 482 struct kvm_cpuid_entry2 *c;
bb0300dc 483 uint32_t signature[3];
234cc647 484 int kvm_base = KVM_CPUID_SIGNATURE;
e7429073 485 int r;
05330448 486
ef4cbe14
SW
487 memset(&cpuid_data, 0, sizeof(cpuid_data));
488
05330448
AL
489 cpuid_i = 0;
490
bb0300dc 491 /* Paravirtualization CPUIDs */
234cc647
PB
492 if (hyperv_enabled(cpu)) {
493 c = &cpuid_data.entries[cpuid_i++];
494 c->function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
eab70139
VR
495 memcpy(signature, "Microsoft Hv", 12);
496 c->eax = HYPERV_CPUID_MIN;
234cc647
PB
497 c->ebx = signature[0];
498 c->ecx = signature[1];
499 c->edx = signature[2];
0c31b744 500
234cc647
PB
501 c = &cpuid_data.entries[cpuid_i++];
502 c->function = HYPERV_CPUID_INTERFACE;
eab70139
VR
503 memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
504 c->eax = signature[0];
234cc647
PB
505 c->ebx = 0;
506 c->ecx = 0;
507 c->edx = 0;
eab70139
VR
508
509 c = &cpuid_data.entries[cpuid_i++];
eab70139
VR
510 c->function = HYPERV_CPUID_VERSION;
511 c->eax = 0x00001bbc;
512 c->ebx = 0x00060001;
513
514 c = &cpuid_data.entries[cpuid_i++];
eab70139 515 c->function = HYPERV_CPUID_FEATURES;
92067bf4 516 if (cpu->hyperv_relaxed_timing) {
eab70139
VR
517 c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
518 }
92067bf4 519 if (cpu->hyperv_vapic) {
eab70139
VR
520 c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
521 c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
7bc3d711 522 has_msr_hv_vapic = true;
eab70139 523 }
48a5f3bc
VR
524 if (cpu->hyperv_time &&
525 kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) {
526 c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
527 c->eax |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE;
528 c->eax |= 0x200;
529 has_msr_hv_tsc = true;
530 }
f2a53c9e
AS
531 if (cpu->hyperv_crash && has_msr_hv_crash) {
532 c->edx |= HV_X64_GUEST_CRASH_MSR_AVAILABLE;
533 }
744b8a94
AS
534 if (cpu->hyperv_reset && has_msr_hv_reset) {
535 c->eax |= HV_X64_MSR_RESET_AVAILABLE;
536 }
eab70139 537 c = &cpuid_data.entries[cpuid_i++];
eab70139 538 c->function = HYPERV_CPUID_ENLIGHTMENT_INFO;
92067bf4 539 if (cpu->hyperv_relaxed_timing) {
eab70139
VR
540 c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
541 }
7bc3d711 542 if (has_msr_hv_vapic) {
eab70139
VR
543 c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
544 }
92067bf4 545 c->ebx = cpu->hyperv_spinlock_attempts;
eab70139
VR
546
547 c = &cpuid_data.entries[cpuid_i++];
eab70139
VR
548 c->function = HYPERV_CPUID_IMPLEMENT_LIMITS;
549 c->eax = 0x40;
550 c->ebx = 0x40;
551
234cc647 552 kvm_base = KVM_CPUID_SIGNATURE_NEXT;
7bc3d711 553 has_msr_hv_hypercall = true;
eab70139
VR
554 }
555
f522d2ac
AW
556 if (cpu->expose_kvm) {
557 memcpy(signature, "KVMKVMKVM\0\0\0", 12);
558 c = &cpuid_data.entries[cpuid_i++];
559 c->function = KVM_CPUID_SIGNATURE | kvm_base;
79b6f2f6 560 c->eax = KVM_CPUID_FEATURES | kvm_base;
f522d2ac
AW
561 c->ebx = signature[0];
562 c->ecx = signature[1];
563 c->edx = signature[2];
234cc647 564
f522d2ac
AW
565 c = &cpuid_data.entries[cpuid_i++];
566 c->function = KVM_CPUID_FEATURES | kvm_base;
567 c->eax = env->features[FEAT_KVM];
234cc647 568
f522d2ac 569 has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
bb0300dc 570
f522d2ac 571 has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
bc9a839d 572
f522d2ac
AW
573 has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
574 }
917367aa 575
a33609ca 576 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
05330448
AL
577
578 for (i = 0; i <= limit; i++) {
f8bb0565
IM
579 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
580 fprintf(stderr, "unsupported level value: 0x%x\n", limit);
581 abort();
582 }
bb0300dc 583 c = &cpuid_data.entries[cpuid_i++];
486bd5a2
AL
584
585 switch (i) {
a36b1029
AL
586 case 2: {
587 /* Keep reading function 2 till all the input is received */
588 int times;
589
a36b1029 590 c->function = i;
a33609ca
AL
591 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
592 KVM_CPUID_FLAG_STATE_READ_NEXT;
593 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
594 times = c->eax & 0xff;
a36b1029
AL
595
596 for (j = 1; j < times; ++j) {
f8bb0565
IM
597 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
598 fprintf(stderr, "cpuid_data is full, no space for "
599 "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
600 abort();
601 }
a33609ca 602 c = &cpuid_data.entries[cpuid_i++];
a36b1029 603 c->function = i;
a33609ca
AL
604 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
605 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
a36b1029
AL
606 }
607 break;
608 }
486bd5a2
AL
609 case 4:
610 case 0xb:
611 case 0xd:
612 for (j = 0; ; j++) {
31e8c696
AP
613 if (i == 0xd && j == 64) {
614 break;
615 }
486bd5a2
AL
616 c->function = i;
617 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
618 c->index = j;
a33609ca 619 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
486bd5a2 620
b9bec74b 621 if (i == 4 && c->eax == 0) {
486bd5a2 622 break;
b9bec74b
JK
623 }
624 if (i == 0xb && !(c->ecx & 0xff00)) {
486bd5a2 625 break;
b9bec74b
JK
626 }
627 if (i == 0xd && c->eax == 0) {
31e8c696 628 continue;
b9bec74b 629 }
f8bb0565
IM
630 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
631 fprintf(stderr, "cpuid_data is full, no space for "
632 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
633 abort();
634 }
a33609ca 635 c = &cpuid_data.entries[cpuid_i++];
486bd5a2
AL
636 }
637 break;
638 default:
486bd5a2 639 c->function = i;
a33609ca
AL
640 c->flags = 0;
641 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
486bd5a2
AL
642 break;
643 }
05330448 644 }
0d894367
PB
645
646 if (limit >= 0x0a) {
647 uint32_t ver;
648
649 cpu_x86_cpuid(env, 0x0a, 0, &ver, &unused, &unused, &unused);
650 if ((ver & 0xff) > 0) {
651 has_msr_architectural_pmu = true;
652 num_architectural_pmu_counters = (ver & 0xff00) >> 8;
653
654 /* Shouldn't be more than 32, since that's the number of bits
655 * available in EBX to tell us _which_ counters are available.
656 * Play it safe.
657 */
658 if (num_architectural_pmu_counters > MAX_GP_COUNTERS) {
659 num_architectural_pmu_counters = MAX_GP_COUNTERS;
660 }
661 }
662 }
663
a33609ca 664 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
05330448
AL
665
666 for (i = 0x80000000; i <= limit; i++) {
f8bb0565
IM
667 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
668 fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
669 abort();
670 }
bb0300dc 671 c = &cpuid_data.entries[cpuid_i++];
05330448 672
05330448 673 c->function = i;
a33609ca
AL
674 c->flags = 0;
675 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
05330448
AL
676 }
677
b3baa152
BW
678 /* Call Centaur's CPUID instructions they are supported. */
679 if (env->cpuid_xlevel2 > 0) {
b3baa152
BW
680 cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
681
682 for (i = 0xC0000000; i <= limit; i++) {
f8bb0565
IM
683 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
684 fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
685 abort();
686 }
b3baa152
BW
687 c = &cpuid_data.entries[cpuid_i++];
688
689 c->function = i;
690 c->flags = 0;
691 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
692 }
693 }
694
05330448
AL
695 cpuid_data.cpuid.nent = cpuid_i;
696
e7701825 697 if (((env->cpuid_version >> 8)&0xF) >= 6
0514ef2f 698 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) ==
fc7a504c 699 (CPUID_MCE | CPUID_MCA)
a60f24b5 700 && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) {
e7701825
MT
701 uint64_t mcg_cap;
702 int banks;
32a42024 703 int ret;
e7701825 704
a60f24b5 705 ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks);
75d49497
JK
706 if (ret < 0) {
707 fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret));
708 return ret;
e7701825 709 }
75d49497
JK
710
711 if (banks > MCE_BANKS_DEF) {
712 banks = MCE_BANKS_DEF;
713 }
714 mcg_cap &= MCE_CAP_DEF;
715 mcg_cap |= banks;
1bc22652 716 ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &mcg_cap);
75d49497
JK
717 if (ret < 0) {
718 fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret));
719 return ret;
720 }
721
722 env->mcg_cap = mcg_cap;
e7701825 723 }
e7701825 724
b8cc45d6
GC
725 qemu_add_vm_change_state_handler(cpu_update_state, env);
726
df67696e
LJ
727 c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
728 if (c) {
729 has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
730 !!(c->ecx & CPUID_EXT_SMX);
731 }
732
68bfd0ad
MT
733 c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0);
734 if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) {
735 /* for migration */
736 error_setg(&invtsc_mig_blocker,
737 "State blocked by non-migratable CPU device"
738 " (invtsc flag)");
739 migrate_add_blocker(invtsc_mig_blocker);
740 /* for savevm */
741 vmstate_x86_cpu.unmigratable = 1;
742 }
743
7e680753 744 cpuid_data.cpuid.padding = 0;
1bc22652 745 r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
fdc9c41a
JK
746 if (r) {
747 return r;
748 }
e7429073 749
a60f24b5 750 r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL);
e7429073 751 if (r && env->tsc_khz) {
1bc22652 752 r = kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz);
e7429073
JR
753 if (r < 0) {
754 fprintf(stderr, "KVM_SET_TSC_KHZ failed\n");
755 return r;
756 }
757 }
e7429073 758
fabacc0f
JK
759 if (kvm_has_xsave()) {
760 env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
761 }
762
d1ae67f6
AW
763 if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
764 has_msr_mtrr = true;
765 }
766
e7429073 767 return 0;
05330448
AL
768}
769
50a2c6e5 770void kvm_arch_reset_vcpu(X86CPU *cpu)
caa5af0f 771{
20d695a9 772 CPUX86State *env = &cpu->env;
dd673288 773
e73223a5 774 env->exception_injected = -1;
0e607a80 775 env->interrupt_injected = -1;
1a5e9d2f 776 env->xcr0 = 1;
ddced198 777 if (kvm_irqchip_in_kernel()) {
dd673288 778 env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE :
ddced198
MT
779 KVM_MP_STATE_UNINITIALIZED;
780 } else {
781 env->mp_state = KVM_MP_STATE_RUNNABLE;
782 }
caa5af0f
JK
783}
784
e0723c45
PB
785void kvm_arch_do_init_vcpu(X86CPU *cpu)
786{
787 CPUX86State *env = &cpu->env;
788
789 /* APs get directly into wait-for-SIPI state. */
790 if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) {
791 env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
792 }
793}
794
c3a3a7d3 795static int kvm_get_supported_msrs(KVMState *s)
05330448 796{
75b10c43 797 static int kvm_supported_msrs;
c3a3a7d3 798 int ret = 0;
05330448
AL
799
800 /* first time */
75b10c43 801 if (kvm_supported_msrs == 0) {
05330448
AL
802 struct kvm_msr_list msr_list, *kvm_msr_list;
803
75b10c43 804 kvm_supported_msrs = -1;
05330448
AL
805
806 /* Obtain MSR list from KVM. These are the MSRs that we must
807 * save/restore */
4c9f7372 808 msr_list.nmsrs = 0;
c3a3a7d3 809 ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
6fb6d245 810 if (ret < 0 && ret != -E2BIG) {
c3a3a7d3 811 return ret;
6fb6d245 812 }
d9db889f
JK
813 /* Old kernel modules had a bug and could write beyond the provided
814 memory. Allocate at least a safe amount of 1K. */
7267c094 815 kvm_msr_list = g_malloc0(MAX(1024, sizeof(msr_list) +
d9db889f
JK
816 msr_list.nmsrs *
817 sizeof(msr_list.indices[0])));
05330448 818
55308450 819 kvm_msr_list->nmsrs = msr_list.nmsrs;
c3a3a7d3 820 ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
05330448
AL
821 if (ret >= 0) {
822 int i;
823
824 for (i = 0; i < kvm_msr_list->nmsrs; i++) {
825 if (kvm_msr_list->indices[i] == MSR_STAR) {
c3a3a7d3 826 has_msr_star = true;
75b10c43
MT
827 continue;
828 }
829 if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
c3a3a7d3 830 has_msr_hsave_pa = true;
75b10c43 831 continue;
05330448 832 }
c9b8f6b6
AS
833 if (kvm_msr_list->indices[i] == MSR_TSC_AUX) {
834 has_msr_tsc_aux = true;
835 continue;
836 }
f28558d3
WA
837 if (kvm_msr_list->indices[i] == MSR_TSC_ADJUST) {
838 has_msr_tsc_adjust = true;
839 continue;
840 }
aa82ba54
LJ
841 if (kvm_msr_list->indices[i] == MSR_IA32_TSCDEADLINE) {
842 has_msr_tsc_deadline = true;
843 continue;
844 }
fc12d72e
PB
845 if (kvm_msr_list->indices[i] == MSR_IA32_SMBASE) {
846 has_msr_smbase = true;
847 continue;
848 }
21e87c46
AK
849 if (kvm_msr_list->indices[i] == MSR_IA32_MISC_ENABLE) {
850 has_msr_misc_enable = true;
851 continue;
852 }
79e9ebeb
LJ
853 if (kvm_msr_list->indices[i] == MSR_IA32_BNDCFGS) {
854 has_msr_bndcfgs = true;
855 continue;
856 }
18cd2c17
WL
857 if (kvm_msr_list->indices[i] == MSR_IA32_XSS) {
858 has_msr_xss = true;
859 continue;
860 }
f2a53c9e
AS
861 if (kvm_msr_list->indices[i] == HV_X64_MSR_CRASH_CTL) {
862 has_msr_hv_crash = true;
863 continue;
864 }
744b8a94
AS
865 if (kvm_msr_list->indices[i] == HV_X64_MSR_RESET) {
866 has_msr_hv_reset = true;
867 continue;
868 }
05330448
AL
869 }
870 }
871
7267c094 872 g_free(kvm_msr_list);
05330448
AL
873 }
874
c3a3a7d3 875 return ret;
05330448
AL
876}
877
6410848b
PB
878static Notifier smram_machine_done;
879static KVMMemoryListener smram_listener;
880static AddressSpace smram_address_space;
881static MemoryRegion smram_as_root;
882static MemoryRegion smram_as_mem;
883
884static void register_smram_listener(Notifier *n, void *unused)
885{
886 MemoryRegion *smram =
887 (MemoryRegion *) object_resolve_path("/machine/smram", NULL);
888
889 /* Outer container... */
890 memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull);
891 memory_region_set_enabled(&smram_as_root, true);
892
893 /* ... with two regions inside: normal system memory with low
894 * priority, and...
895 */
896 memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram",
897 get_system_memory(), 0, ~0ull);
898 memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0);
899 memory_region_set_enabled(&smram_as_mem, true);
900
901 if (smram) {
902 /* ... SMRAM with higher priority */
903 memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10);
904 memory_region_set_enabled(smram, true);
905 }
906
907 address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM");
908 kvm_memory_listener_register(kvm_state, &smram_listener,
909 &smram_address_space, 1);
910}
911
b16565b3 912int kvm_arch_init(MachineState *ms, KVMState *s)
20420430 913{
11076198 914 uint64_t identity_base = 0xfffbc000;
39d6960a 915 uint64_t shadow_mem;
20420430 916 int ret;
25d2e361 917 struct utsname utsname;
20420430 918
c3a3a7d3 919 ret = kvm_get_supported_msrs(s);
20420430 920 if (ret < 0) {
20420430
SY
921 return ret;
922 }
25d2e361
MT
923
924 uname(&utsname);
925 lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
926
4c5b10b7 927 /*
11076198
JK
928 * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly.
929 * In order to use vm86 mode, an EPT identity map and a TSS are needed.
930 * Since these must be part of guest physical memory, we need to allocate
931 * them, both by setting their start addresses in the kernel and by
932 * creating a corresponding e820 entry. We need 4 pages before the BIOS.
933 *
934 * Older KVM versions may not support setting the identity map base. In
935 * that case we need to stick with the default, i.e. a 256K maximum BIOS
936 * size.
4c5b10b7 937 */
11076198
JK
938 if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
939 /* Allows up to 16M BIOSes. */
940 identity_base = 0xfeffc000;
941
942 ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base);
943 if (ret < 0) {
944 return ret;
945 }
4c5b10b7 946 }
e56ff191 947
11076198
JK
948 /* Set TSS base one page after EPT identity map. */
949 ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000);
20420430
SY
950 if (ret < 0) {
951 return ret;
952 }
953
11076198
JK
954 /* Tell fw_cfg to notify the BIOS to reserve the range. */
955 ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED);
20420430 956 if (ret < 0) {
11076198 957 fprintf(stderr, "e820_add_entry() table is full\n");
20420430
SY
958 return ret;
959 }
3c85e74f 960 qemu_register_reset(kvm_unpoison_all, NULL);
20420430 961
4689b77b 962 shadow_mem = machine_kvm_shadow_mem(ms);
36ad0e94
MA
963 if (shadow_mem != -1) {
964 shadow_mem /= 4096;
965 ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem);
966 if (ret < 0) {
967 return ret;
39d6960a
JK
968 }
969 }
6410848b
PB
970
971 if (kvm_check_extension(s, KVM_CAP_X86_SMM)) {
972 smram_machine_done.notify = register_smram_listener;
973 qemu_add_machine_init_done_notifier(&smram_machine_done);
974 }
11076198 975 return 0;
05330448 976}
b9bec74b 977
05330448
AL
978static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
979{
980 lhs->selector = rhs->selector;
981 lhs->base = rhs->base;
982 lhs->limit = rhs->limit;
983 lhs->type = 3;
984 lhs->present = 1;
985 lhs->dpl = 3;
986 lhs->db = 0;
987 lhs->s = 1;
988 lhs->l = 0;
989 lhs->g = 0;
990 lhs->avl = 0;
991 lhs->unusable = 0;
992}
993
994static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
995{
996 unsigned flags = rhs->flags;
997 lhs->selector = rhs->selector;
998 lhs->base = rhs->base;
999 lhs->limit = rhs->limit;
1000 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
1001 lhs->present = (flags & DESC_P_MASK) != 0;
acaa7550 1002 lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3;
05330448
AL
1003 lhs->db = (flags >> DESC_B_SHIFT) & 1;
1004 lhs->s = (flags & DESC_S_MASK) != 0;
1005 lhs->l = (flags >> DESC_L_SHIFT) & 1;
1006 lhs->g = (flags & DESC_G_MASK) != 0;
1007 lhs->avl = (flags & DESC_AVL_MASK) != 0;
1008 lhs->unusable = 0;
7e680753 1009 lhs->padding = 0;
05330448
AL
1010}
1011
1012static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
1013{
1014 lhs->selector = rhs->selector;
1015 lhs->base = rhs->base;
1016 lhs->limit = rhs->limit;
b9bec74b
JK
1017 lhs->flags = (rhs->type << DESC_TYPE_SHIFT) |
1018 (rhs->present * DESC_P_MASK) |
1019 (rhs->dpl << DESC_DPL_SHIFT) |
1020 (rhs->db << DESC_B_SHIFT) |
1021 (rhs->s * DESC_S_MASK) |
1022 (rhs->l << DESC_L_SHIFT) |
1023 (rhs->g * DESC_G_MASK) |
1024 (rhs->avl * DESC_AVL_MASK);
05330448
AL
1025}
1026
1027static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
1028{
b9bec74b 1029 if (set) {
05330448 1030 *kvm_reg = *qemu_reg;
b9bec74b 1031 } else {
05330448 1032 *qemu_reg = *kvm_reg;
b9bec74b 1033 }
05330448
AL
1034}
1035
1bc22652 1036static int kvm_getput_regs(X86CPU *cpu, int set)
05330448 1037{
1bc22652 1038 CPUX86State *env = &cpu->env;
05330448
AL
1039 struct kvm_regs regs;
1040 int ret = 0;
1041
1042 if (!set) {
1bc22652 1043 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, &regs);
b9bec74b 1044 if (ret < 0) {
05330448 1045 return ret;
b9bec74b 1046 }
05330448
AL
1047 }
1048
1049 kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
1050 kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
1051 kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
1052 kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
1053 kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
1054 kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
1055 kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
1056 kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
1057#ifdef TARGET_X86_64
1058 kvm_getput_reg(&regs.r8, &env->regs[8], set);
1059 kvm_getput_reg(&regs.r9, &env->regs[9], set);
1060 kvm_getput_reg(&regs.r10, &env->regs[10], set);
1061 kvm_getput_reg(&regs.r11, &env->regs[11], set);
1062 kvm_getput_reg(&regs.r12, &env->regs[12], set);
1063 kvm_getput_reg(&regs.r13, &env->regs[13], set);
1064 kvm_getput_reg(&regs.r14, &env->regs[14], set);
1065 kvm_getput_reg(&regs.r15, &env->regs[15], set);
1066#endif
1067
1068 kvm_getput_reg(&regs.rflags, &env->eflags, set);
1069 kvm_getput_reg(&regs.rip, &env->eip, set);
1070
b9bec74b 1071 if (set) {
1bc22652 1072 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, &regs);
b9bec74b 1073 }
05330448
AL
1074
1075 return ret;
1076}
1077
1bc22652 1078static int kvm_put_fpu(X86CPU *cpu)
05330448 1079{
1bc22652 1080 CPUX86State *env = &cpu->env;
05330448
AL
1081 struct kvm_fpu fpu;
1082 int i;
1083
1084 memset(&fpu, 0, sizeof fpu);
1085 fpu.fsw = env->fpus & ~(7 << 11);
1086 fpu.fsw |= (env->fpstt & 7) << 11;
1087 fpu.fcw = env->fpuc;
42cc8fa6
JK
1088 fpu.last_opcode = env->fpop;
1089 fpu.last_ip = env->fpip;
1090 fpu.last_dp = env->fpdp;
b9bec74b
JK
1091 for (i = 0; i < 8; ++i) {
1092 fpu.ftwx |= (!env->fptags[i]) << i;
1093 }
05330448 1094 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
bee81887
PB
1095 for (i = 0; i < CPU_NB_REGS; i++) {
1096 stq_p(&fpu.xmm[i][0], env->xmm_regs[i].XMM_Q(0));
1097 stq_p(&fpu.xmm[i][8], env->xmm_regs[i].XMM_Q(1));
1098 }
05330448
AL
1099 fpu.mxcsr = env->mxcsr;
1100
1bc22652 1101 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
05330448
AL
1102}
1103
6b42494b
JK
1104#define XSAVE_FCW_FSW 0
1105#define XSAVE_FTW_FOP 1
f1665b21
SY
1106#define XSAVE_CWD_RIP 2
1107#define XSAVE_CWD_RDP 4
1108#define XSAVE_MXCSR 6
1109#define XSAVE_ST_SPACE 8
1110#define XSAVE_XMM_SPACE 40
1111#define XSAVE_XSTATE_BV 128
1112#define XSAVE_YMMH_SPACE 144
79e9ebeb
LJ
1113#define XSAVE_BNDREGS 240
1114#define XSAVE_BNDCSR 256
9aecd6f8
CP
1115#define XSAVE_OPMASK 272
1116#define XSAVE_ZMM_Hi256 288
1117#define XSAVE_Hi16_ZMM 416
f1665b21 1118
1bc22652 1119static int kvm_put_xsave(X86CPU *cpu)
f1665b21 1120{
1bc22652 1121 CPUX86State *env = &cpu->env;
fabacc0f 1122 struct kvm_xsave* xsave = env->kvm_xsave_buf;
42cc8fa6 1123 uint16_t cwd, swd, twd;
b7711471 1124 uint8_t *xmm, *ymmh, *zmmh;
fabacc0f 1125 int i, r;
f1665b21 1126
b9bec74b 1127 if (!kvm_has_xsave()) {
1bc22652 1128 return kvm_put_fpu(cpu);
b9bec74b 1129 }
f1665b21 1130
f1665b21 1131 memset(xsave, 0, sizeof(struct kvm_xsave));
6115c0a8 1132 twd = 0;
f1665b21
SY
1133 swd = env->fpus & ~(7 << 11);
1134 swd |= (env->fpstt & 7) << 11;
1135 cwd = env->fpuc;
b9bec74b 1136 for (i = 0; i < 8; ++i) {
f1665b21 1137 twd |= (!env->fptags[i]) << i;
b9bec74b 1138 }
6b42494b
JK
1139 xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
1140 xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
42cc8fa6
JK
1141 memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
1142 memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
f1665b21
SY
1143 memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
1144 sizeof env->fpregs);
f1665b21
SY
1145 xsave->region[XSAVE_MXCSR] = env->mxcsr;
1146 *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
79e9ebeb
LJ
1147 memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs,
1148 sizeof env->bnd_regs);
1149 memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
1150 sizeof(env->bndcs_regs));
9aecd6f8
CP
1151 memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
1152 sizeof env->opmask_regs);
bee81887
PB
1153
1154 xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
b7711471
PB
1155 ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
1156 zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
1157 for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
bee81887
PB
1158 stq_p(xmm, env->xmm_regs[i].XMM_Q(0));
1159 stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1));
b7711471
PB
1160 stq_p(ymmh, env->xmm_regs[i].XMM_Q(2));
1161 stq_p(ymmh+8, env->xmm_regs[i].XMM_Q(3));
1162 stq_p(zmmh, env->xmm_regs[i].XMM_Q(4));
1163 stq_p(zmmh+8, env->xmm_regs[i].XMM_Q(5));
1164 stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6));
1165 stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7));
bee81887
PB
1166 }
1167
9aecd6f8 1168#ifdef TARGET_X86_64
b7711471
PB
1169 memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16],
1170 16 * sizeof env->xmm_regs[16]);
9aecd6f8 1171#endif
1bc22652 1172 r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
0f53994f 1173 return r;
f1665b21
SY
1174}
1175
1bc22652 1176static int kvm_put_xcrs(X86CPU *cpu)
f1665b21 1177{
1bc22652 1178 CPUX86State *env = &cpu->env;
bdfc8480 1179 struct kvm_xcrs xcrs = {};
f1665b21 1180
b9bec74b 1181 if (!kvm_has_xcrs()) {
f1665b21 1182 return 0;
b9bec74b 1183 }
f1665b21
SY
1184
1185 xcrs.nr_xcrs = 1;
1186 xcrs.flags = 0;
1187 xcrs.xcrs[0].xcr = 0;
1188 xcrs.xcrs[0].value = env->xcr0;
1bc22652 1189 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs);
f1665b21
SY
1190}
1191
1bc22652 1192static int kvm_put_sregs(X86CPU *cpu)
05330448 1193{
1bc22652 1194 CPUX86State *env = &cpu->env;
05330448
AL
1195 struct kvm_sregs sregs;
1196
0e607a80
JK
1197 memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
1198 if (env->interrupt_injected >= 0) {
1199 sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
1200 (uint64_t)1 << (env->interrupt_injected % 64);
1201 }
05330448
AL
1202
1203 if ((env->eflags & VM_MASK)) {
b9bec74b
JK
1204 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
1205 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
1206 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
1207 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
1208 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
1209 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
05330448 1210 } else {
b9bec74b
JK
1211 set_seg(&sregs.cs, &env->segs[R_CS]);
1212 set_seg(&sregs.ds, &env->segs[R_DS]);
1213 set_seg(&sregs.es, &env->segs[R_ES]);
1214 set_seg(&sregs.fs, &env->segs[R_FS]);
1215 set_seg(&sregs.gs, &env->segs[R_GS]);
1216 set_seg(&sregs.ss, &env->segs[R_SS]);
05330448
AL
1217 }
1218
1219 set_seg(&sregs.tr, &env->tr);
1220 set_seg(&sregs.ldt, &env->ldt);
1221
1222 sregs.idt.limit = env->idt.limit;
1223 sregs.idt.base = env->idt.base;
7e680753 1224 memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
05330448
AL
1225 sregs.gdt.limit = env->gdt.limit;
1226 sregs.gdt.base = env->gdt.base;
7e680753 1227 memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
05330448
AL
1228
1229 sregs.cr0 = env->cr[0];
1230 sregs.cr2 = env->cr[2];
1231 sregs.cr3 = env->cr[3];
1232 sregs.cr4 = env->cr[4];
1233
02e51483
CF
1234 sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
1235 sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
05330448
AL
1236
1237 sregs.efer = env->efer;
1238
1bc22652 1239 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
05330448
AL
1240}
1241
1242static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
1243 uint32_t index, uint64_t value)
1244{
1245 entry->index = index;
c7fe4b12 1246 entry->reserved = 0;
05330448
AL
1247 entry->data = value;
1248}
1249
7477cd38
MT
1250static int kvm_put_tscdeadline_msr(X86CPU *cpu)
1251{
1252 CPUX86State *env = &cpu->env;
1253 struct {
1254 struct kvm_msrs info;
1255 struct kvm_msr_entry entries[1];
1256 } msr_data;
1257 struct kvm_msr_entry *msrs = msr_data.entries;
1258
1259 if (!has_msr_tsc_deadline) {
1260 return 0;
1261 }
1262
1263 kvm_msr_entry_set(&msrs[0], MSR_IA32_TSCDEADLINE, env->tsc_deadline);
1264
c7fe4b12
CB
1265 msr_data.info = (struct kvm_msrs) {
1266 .nmsrs = 1,
1267 };
7477cd38
MT
1268
1269 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
1270}
1271
6bdf863d
JK
1272/*
1273 * Provide a separate write service for the feature control MSR in order to
1274 * kick the VCPU out of VMXON or even guest mode on reset. This has to be done
1275 * before writing any other state because forcibly leaving nested mode
1276 * invalidates the VCPU state.
1277 */
1278static int kvm_put_msr_feature_control(X86CPU *cpu)
1279{
1280 struct {
1281 struct kvm_msrs info;
1282 struct kvm_msr_entry entry;
1283 } msr_data;
1284
1285 kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL,
1286 cpu->env.msr_ia32_feature_control);
c7fe4b12
CB
1287
1288 msr_data.info = (struct kvm_msrs) {
1289 .nmsrs = 1,
1290 };
1291
6bdf863d
JK
1292 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
1293}
1294
1bc22652 1295static int kvm_put_msrs(X86CPU *cpu, int level)
05330448 1296{
1bc22652 1297 CPUX86State *env = &cpu->env;
05330448
AL
1298 struct {
1299 struct kvm_msrs info;
d1ae67f6 1300 struct kvm_msr_entry entries[150];
05330448
AL
1301 } msr_data;
1302 struct kvm_msr_entry *msrs = msr_data.entries;
0d894367 1303 int n = 0, i;
05330448
AL
1304
1305 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
1306 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
1307 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
0c03266a 1308 kvm_msr_entry_set(&msrs[n++], MSR_PAT, env->pat);
c3a3a7d3 1309 if (has_msr_star) {
b9bec74b
JK
1310 kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
1311 }
c3a3a7d3 1312 if (has_msr_hsave_pa) {
75b10c43 1313 kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
b9bec74b 1314 }
c9b8f6b6
AS
1315 if (has_msr_tsc_aux) {
1316 kvm_msr_entry_set(&msrs[n++], MSR_TSC_AUX, env->tsc_aux);
1317 }
f28558d3
WA
1318 if (has_msr_tsc_adjust) {
1319 kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust);
1320 }
21e87c46
AK
1321 if (has_msr_misc_enable) {
1322 kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE,
1323 env->msr_ia32_misc_enable);
1324 }
fc12d72e
PB
1325 if (has_msr_smbase) {
1326 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SMBASE, env->smbase);
1327 }
439d19f2
PB
1328 if (has_msr_bndcfgs) {
1329 kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs);
1330 }
18cd2c17
WL
1331 if (has_msr_xss) {
1332 kvm_msr_entry_set(&msrs[n++], MSR_IA32_XSS, env->xss);
1333 }
05330448 1334#ifdef TARGET_X86_64
25d2e361
MT
1335 if (lm_capable_kernel) {
1336 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
1337 kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
1338 kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
1339 kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
1340 }
05330448 1341#endif
ff5c186b 1342 /*
0d894367
PB
1343 * The following MSRs have side effects on the guest or are too heavy
1344 * for normal writeback. Limit them to reset or full state updates.
ff5c186b
JK
1345 */
1346 if (level >= KVM_PUT_RESET_STATE) {
0522604b 1347 kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
ea643051
JK
1348 kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
1349 env->system_time_msr);
1350 kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
c5999bfc
JK
1351 if (has_msr_async_pf_en) {
1352 kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN,
1353 env->async_pf_en_msr);
1354 }
bc9a839d
MT
1355 if (has_msr_pv_eoi_en) {
1356 kvm_msr_entry_set(&msrs[n++], MSR_KVM_PV_EOI_EN,
1357 env->pv_eoi_en_msr);
1358 }
917367aa
MT
1359 if (has_msr_kvm_steal_time) {
1360 kvm_msr_entry_set(&msrs[n++], MSR_KVM_STEAL_TIME,
1361 env->steal_time_msr);
1362 }
0d894367
PB
1363 if (has_msr_architectural_pmu) {
1364 /* Stop the counter. */
1365 kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
1366 kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL, 0);
1367
1368 /* Set the counter values. */
1369 for (i = 0; i < MAX_FIXED_COUNTERS; i++) {
1370 kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR0 + i,
1371 env->msr_fixed_counters[i]);
1372 }
1373 for (i = 0; i < num_architectural_pmu_counters; i++) {
1374 kvm_msr_entry_set(&msrs[n++], MSR_P6_PERFCTR0 + i,
1375 env->msr_gp_counters[i]);
1376 kvm_msr_entry_set(&msrs[n++], MSR_P6_EVNTSEL0 + i,
1377 env->msr_gp_evtsel[i]);
1378 }
1379 kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_STATUS,
1380 env->msr_global_status);
1381 kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1382 env->msr_global_ovf_ctrl);
1383
1384 /* Now start the PMU. */
1385 kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_FIXED_CTR_CTRL,
1386 env->msr_fixed_ctr_ctrl);
1387 kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL,
1388 env->msr_global_ctrl);
1389 }
7bc3d711 1390 if (has_msr_hv_hypercall) {
1c90ef26
VR
1391 kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID,
1392 env->msr_hv_guest_os_id);
1393 kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL,
1394 env->msr_hv_hypercall);
eab70139 1395 }
7bc3d711 1396 if (has_msr_hv_vapic) {
5ef68987
VR
1397 kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE,
1398 env->msr_hv_vapic);
eab70139 1399 }
48a5f3bc
VR
1400 if (has_msr_hv_tsc) {
1401 kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC,
1402 env->msr_hv_tsc);
1403 }
f2a53c9e
AS
1404 if (has_msr_hv_crash) {
1405 int j;
1406
1407 for (j = 0; j < HV_X64_MSR_CRASH_PARAMS; j++)
1408 kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_CRASH_P0 + j,
1409 env->msr_hv_crash_params[j]);
1410
1411 kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_CRASH_CTL,
1412 HV_X64_MSR_CRASH_CTL_NOTIFY);
1413 }
d1ae67f6
AW
1414 if (has_msr_mtrr) {
1415 kvm_msr_entry_set(&msrs[n++], MSR_MTRRdefType, env->mtrr_deftype);
1416 kvm_msr_entry_set(&msrs[n++],
1417 MSR_MTRRfix64K_00000, env->mtrr_fixed[0]);
1418 kvm_msr_entry_set(&msrs[n++],
1419 MSR_MTRRfix16K_80000, env->mtrr_fixed[1]);
1420 kvm_msr_entry_set(&msrs[n++],
1421 MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]);
1422 kvm_msr_entry_set(&msrs[n++],
1423 MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]);
1424 kvm_msr_entry_set(&msrs[n++],
1425 MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]);
1426 kvm_msr_entry_set(&msrs[n++],
1427 MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]);
1428 kvm_msr_entry_set(&msrs[n++],
1429 MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]);
1430 kvm_msr_entry_set(&msrs[n++],
1431 MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]);
1432 kvm_msr_entry_set(&msrs[n++],
1433 MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]);
1434 kvm_msr_entry_set(&msrs[n++],
1435 MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]);
1436 kvm_msr_entry_set(&msrs[n++],
1437 MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]);
1438 for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
1439 kvm_msr_entry_set(&msrs[n++],
1440 MSR_MTRRphysBase(i), env->mtrr_var[i].base);
1441 kvm_msr_entry_set(&msrs[n++],
1442 MSR_MTRRphysMask(i), env->mtrr_var[i].mask);
1443 }
1444 }
6bdf863d
JK
1445
1446 /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
1447 * kvm_put_msr_feature_control. */
ea643051 1448 }
57780495 1449 if (env->mcg_cap) {
d8da8574 1450 int i;
b9bec74b 1451
c34d440a
JK
1452 kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
1453 kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
1454 for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
1455 kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
57780495
MT
1456 }
1457 }
1a03675d 1458
c7fe4b12
CB
1459 msr_data.info = (struct kvm_msrs) {
1460 .nmsrs = n,
1461 };
05330448 1462
1bc22652 1463 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
05330448
AL
1464
1465}
1466
1467
1bc22652 1468static int kvm_get_fpu(X86CPU *cpu)
05330448 1469{
1bc22652 1470 CPUX86State *env = &cpu->env;
05330448
AL
1471 struct kvm_fpu fpu;
1472 int i, ret;
1473
1bc22652 1474 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_FPU, &fpu);
b9bec74b 1475 if (ret < 0) {
05330448 1476 return ret;
b9bec74b 1477 }
05330448
AL
1478
1479 env->fpstt = (fpu.fsw >> 11) & 7;
1480 env->fpus = fpu.fsw;
1481 env->fpuc = fpu.fcw;
42cc8fa6
JK
1482 env->fpop = fpu.last_opcode;
1483 env->fpip = fpu.last_ip;
1484 env->fpdp = fpu.last_dp;
b9bec74b
JK
1485 for (i = 0; i < 8; ++i) {
1486 env->fptags[i] = !((fpu.ftwx >> i) & 1);
1487 }
05330448 1488 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
bee81887
PB
1489 for (i = 0; i < CPU_NB_REGS; i++) {
1490 env->xmm_regs[i].XMM_Q(0) = ldq_p(&fpu.xmm[i][0]);
1491 env->xmm_regs[i].XMM_Q(1) = ldq_p(&fpu.xmm[i][8]);
1492 }
05330448
AL
1493 env->mxcsr = fpu.mxcsr;
1494
1495 return 0;
1496}
1497
1bc22652 1498static int kvm_get_xsave(X86CPU *cpu)
f1665b21 1499{
1bc22652 1500 CPUX86State *env = &cpu->env;
fabacc0f 1501 struct kvm_xsave* xsave = env->kvm_xsave_buf;
f1665b21 1502 int ret, i;
b7711471 1503 const uint8_t *xmm, *ymmh, *zmmh;
42cc8fa6 1504 uint16_t cwd, swd, twd;
f1665b21 1505
b9bec74b 1506 if (!kvm_has_xsave()) {
1bc22652 1507 return kvm_get_fpu(cpu);
b9bec74b 1508 }
f1665b21 1509
1bc22652 1510 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
0f53994f 1511 if (ret < 0) {
f1665b21 1512 return ret;
0f53994f 1513 }
f1665b21 1514
6b42494b
JK
1515 cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
1516 swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
1517 twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
1518 env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
f1665b21
SY
1519 env->fpstt = (swd >> 11) & 7;
1520 env->fpus = swd;
1521 env->fpuc = cwd;
b9bec74b 1522 for (i = 0; i < 8; ++i) {
f1665b21 1523 env->fptags[i] = !((twd >> i) & 1);
b9bec74b 1524 }
42cc8fa6
JK
1525 memcpy(&env->fpip, &xsave->region[XSAVE_CWD_RIP], sizeof(env->fpip));
1526 memcpy(&env->fpdp, &xsave->region[XSAVE_CWD_RDP], sizeof(env->fpdp));
f1665b21
SY
1527 env->mxcsr = xsave->region[XSAVE_MXCSR];
1528 memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
1529 sizeof env->fpregs);
f1665b21 1530 env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
79e9ebeb
LJ
1531 memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS],
1532 sizeof env->bnd_regs);
1533 memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
1534 sizeof(env->bndcs_regs));
9aecd6f8
CP
1535 memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
1536 sizeof env->opmask_regs);
bee81887
PB
1537
1538 xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
b7711471
PB
1539 ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
1540 zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
1541 for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
bee81887
PB
1542 env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
1543 env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
b7711471
PB
1544 env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
1545 env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8);
1546 env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh);
1547 env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8);
1548 env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16);
1549 env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24);
bee81887
PB
1550 }
1551
9aecd6f8 1552#ifdef TARGET_X86_64
b7711471
PB
1553 memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM],
1554 16 * sizeof env->xmm_regs[16]);
9aecd6f8 1555#endif
f1665b21 1556 return 0;
f1665b21
SY
1557}
1558
1bc22652 1559static int kvm_get_xcrs(X86CPU *cpu)
f1665b21 1560{
1bc22652 1561 CPUX86State *env = &cpu->env;
f1665b21
SY
1562 int i, ret;
1563 struct kvm_xcrs xcrs;
1564
b9bec74b 1565 if (!kvm_has_xcrs()) {
f1665b21 1566 return 0;
b9bec74b 1567 }
f1665b21 1568
1bc22652 1569 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs);
b9bec74b 1570 if (ret < 0) {
f1665b21 1571 return ret;
b9bec74b 1572 }
f1665b21 1573
b9bec74b 1574 for (i = 0; i < xcrs.nr_xcrs; i++) {
f1665b21 1575 /* Only support xcr0 now */
0fd53fec
PB
1576 if (xcrs.xcrs[i].xcr == 0) {
1577 env->xcr0 = xcrs.xcrs[i].value;
f1665b21
SY
1578 break;
1579 }
b9bec74b 1580 }
f1665b21 1581 return 0;
f1665b21
SY
1582}
1583
1bc22652 1584static int kvm_get_sregs(X86CPU *cpu)
05330448 1585{
1bc22652 1586 CPUX86State *env = &cpu->env;
05330448
AL
1587 struct kvm_sregs sregs;
1588 uint32_t hflags;
0e607a80 1589 int bit, i, ret;
05330448 1590
1bc22652 1591 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
b9bec74b 1592 if (ret < 0) {
05330448 1593 return ret;
b9bec74b 1594 }
05330448 1595
0e607a80
JK
1596 /* There can only be one pending IRQ set in the bitmap at a time, so try
1597 to find it and save its number instead (-1 for none). */
1598 env->interrupt_injected = -1;
1599 for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
1600 if (sregs.interrupt_bitmap[i]) {
1601 bit = ctz64(sregs.interrupt_bitmap[i]);
1602 env->interrupt_injected = i * 64 + bit;
1603 break;
1604 }
1605 }
05330448
AL
1606
1607 get_seg(&env->segs[R_CS], &sregs.cs);
1608 get_seg(&env->segs[R_DS], &sregs.ds);
1609 get_seg(&env->segs[R_ES], &sregs.es);
1610 get_seg(&env->segs[R_FS], &sregs.fs);
1611 get_seg(&env->segs[R_GS], &sregs.gs);
1612 get_seg(&env->segs[R_SS], &sregs.ss);
1613
1614 get_seg(&env->tr, &sregs.tr);
1615 get_seg(&env->ldt, &sregs.ldt);
1616
1617 env->idt.limit = sregs.idt.limit;
1618 env->idt.base = sregs.idt.base;
1619 env->gdt.limit = sregs.gdt.limit;
1620 env->gdt.base = sregs.gdt.base;
1621
1622 env->cr[0] = sregs.cr0;
1623 env->cr[2] = sregs.cr2;
1624 env->cr[3] = sregs.cr3;
1625 env->cr[4] = sregs.cr4;
1626
05330448 1627 env->efer = sregs.efer;
cce47516
JK
1628
1629 /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */
05330448 1630
b9bec74b
JK
1631#define HFLAG_COPY_MASK \
1632 ~( HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
1633 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
1634 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
1635 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
05330448 1636
7125c937 1637 hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
05330448
AL
1638 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
1639 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
b9bec74b 1640 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
05330448
AL
1641 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
1642 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
b9bec74b 1643 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
05330448
AL
1644
1645 if (env->efer & MSR_EFER_LMA) {
1646 hflags |= HF_LMA_MASK;
1647 }
1648
1649 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
1650 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
1651 } else {
1652 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
b9bec74b 1653 (DESC_B_SHIFT - HF_CS32_SHIFT);
05330448 1654 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
b9bec74b
JK
1655 (DESC_B_SHIFT - HF_SS32_SHIFT);
1656 if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK) ||
1657 !(hflags & HF_CS32_MASK)) {
1658 hflags |= HF_ADDSEG_MASK;
1659 } else {
1660 hflags |= ((env->segs[R_DS].base | env->segs[R_ES].base |
1661 env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
1662 }
05330448
AL
1663 }
1664 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
05330448
AL
1665
1666 return 0;
1667}
1668
1bc22652 1669static int kvm_get_msrs(X86CPU *cpu)
05330448 1670{
1bc22652 1671 CPUX86State *env = &cpu->env;
05330448
AL
1672 struct {
1673 struct kvm_msrs info;
d1ae67f6 1674 struct kvm_msr_entry entries[150];
05330448
AL
1675 } msr_data;
1676 struct kvm_msr_entry *msrs = msr_data.entries;
1677 int ret, i, n;
1678
1679 n = 0;
1680 msrs[n++].index = MSR_IA32_SYSENTER_CS;
1681 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
1682 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
0c03266a 1683 msrs[n++].index = MSR_PAT;
c3a3a7d3 1684 if (has_msr_star) {
b9bec74b
JK
1685 msrs[n++].index = MSR_STAR;
1686 }
c3a3a7d3 1687 if (has_msr_hsave_pa) {
75b10c43 1688 msrs[n++].index = MSR_VM_HSAVE_PA;
b9bec74b 1689 }
c9b8f6b6
AS
1690 if (has_msr_tsc_aux) {
1691 msrs[n++].index = MSR_TSC_AUX;
1692 }
f28558d3
WA
1693 if (has_msr_tsc_adjust) {
1694 msrs[n++].index = MSR_TSC_ADJUST;
1695 }
aa82ba54
LJ
1696 if (has_msr_tsc_deadline) {
1697 msrs[n++].index = MSR_IA32_TSCDEADLINE;
1698 }
21e87c46
AK
1699 if (has_msr_misc_enable) {
1700 msrs[n++].index = MSR_IA32_MISC_ENABLE;
1701 }
fc12d72e
PB
1702 if (has_msr_smbase) {
1703 msrs[n++].index = MSR_IA32_SMBASE;
1704 }
df67696e
LJ
1705 if (has_msr_feature_control) {
1706 msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
1707 }
79e9ebeb
LJ
1708 if (has_msr_bndcfgs) {
1709 msrs[n++].index = MSR_IA32_BNDCFGS;
1710 }
18cd2c17
WL
1711 if (has_msr_xss) {
1712 msrs[n++].index = MSR_IA32_XSS;
1713 }
1714
b8cc45d6
GC
1715
1716 if (!env->tsc_valid) {
1717 msrs[n++].index = MSR_IA32_TSC;
1354869c 1718 env->tsc_valid = !runstate_is_running();
b8cc45d6
GC
1719 }
1720
05330448 1721#ifdef TARGET_X86_64
25d2e361
MT
1722 if (lm_capable_kernel) {
1723 msrs[n++].index = MSR_CSTAR;
1724 msrs[n++].index = MSR_KERNELGSBASE;
1725 msrs[n++].index = MSR_FMASK;
1726 msrs[n++].index = MSR_LSTAR;
1727 }
05330448 1728#endif
1a03675d
GC
1729 msrs[n++].index = MSR_KVM_SYSTEM_TIME;
1730 msrs[n++].index = MSR_KVM_WALL_CLOCK;
c5999bfc
JK
1731 if (has_msr_async_pf_en) {
1732 msrs[n++].index = MSR_KVM_ASYNC_PF_EN;
1733 }
bc9a839d
MT
1734 if (has_msr_pv_eoi_en) {
1735 msrs[n++].index = MSR_KVM_PV_EOI_EN;
1736 }
917367aa
MT
1737 if (has_msr_kvm_steal_time) {
1738 msrs[n++].index = MSR_KVM_STEAL_TIME;
1739 }
0d894367
PB
1740 if (has_msr_architectural_pmu) {
1741 msrs[n++].index = MSR_CORE_PERF_FIXED_CTR_CTRL;
1742 msrs[n++].index = MSR_CORE_PERF_GLOBAL_CTRL;
1743 msrs[n++].index = MSR_CORE_PERF_GLOBAL_STATUS;
1744 msrs[n++].index = MSR_CORE_PERF_GLOBAL_OVF_CTRL;
1745 for (i = 0; i < MAX_FIXED_COUNTERS; i++) {
1746 msrs[n++].index = MSR_CORE_PERF_FIXED_CTR0 + i;
1747 }
1748 for (i = 0; i < num_architectural_pmu_counters; i++) {
1749 msrs[n++].index = MSR_P6_PERFCTR0 + i;
1750 msrs[n++].index = MSR_P6_EVNTSEL0 + i;
1751 }
1752 }
1a03675d 1753
57780495
MT
1754 if (env->mcg_cap) {
1755 msrs[n++].index = MSR_MCG_STATUS;
1756 msrs[n++].index = MSR_MCG_CTL;
b9bec74b 1757 for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
57780495 1758 msrs[n++].index = MSR_MC0_CTL + i;
b9bec74b 1759 }
57780495 1760 }
57780495 1761
1c90ef26
VR
1762 if (has_msr_hv_hypercall) {
1763 msrs[n++].index = HV_X64_MSR_HYPERCALL;
1764 msrs[n++].index = HV_X64_MSR_GUEST_OS_ID;
1765 }
5ef68987
VR
1766 if (has_msr_hv_vapic) {
1767 msrs[n++].index = HV_X64_MSR_APIC_ASSIST_PAGE;
1768 }
48a5f3bc
VR
1769 if (has_msr_hv_tsc) {
1770 msrs[n++].index = HV_X64_MSR_REFERENCE_TSC;
1771 }
f2a53c9e
AS
1772 if (has_msr_hv_crash) {
1773 int j;
1774
1775 for (j = 0; j < HV_X64_MSR_CRASH_PARAMS; j++) {
1776 msrs[n++].index = HV_X64_MSR_CRASH_P0 + j;
1777 }
1778 }
d1ae67f6
AW
1779 if (has_msr_mtrr) {
1780 msrs[n++].index = MSR_MTRRdefType;
1781 msrs[n++].index = MSR_MTRRfix64K_00000;
1782 msrs[n++].index = MSR_MTRRfix16K_80000;
1783 msrs[n++].index = MSR_MTRRfix16K_A0000;
1784 msrs[n++].index = MSR_MTRRfix4K_C0000;
1785 msrs[n++].index = MSR_MTRRfix4K_C8000;
1786 msrs[n++].index = MSR_MTRRfix4K_D0000;
1787 msrs[n++].index = MSR_MTRRfix4K_D8000;
1788 msrs[n++].index = MSR_MTRRfix4K_E0000;
1789 msrs[n++].index = MSR_MTRRfix4K_E8000;
1790 msrs[n++].index = MSR_MTRRfix4K_F0000;
1791 msrs[n++].index = MSR_MTRRfix4K_F8000;
1792 for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
1793 msrs[n++].index = MSR_MTRRphysBase(i);
1794 msrs[n++].index = MSR_MTRRphysMask(i);
1795 }
1796 }
5ef68987 1797
d19ae73e
CB
1798 msr_data.info = (struct kvm_msrs) {
1799 .nmsrs = n,
1800 };
1801
1bc22652 1802 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
b9bec74b 1803 if (ret < 0) {
05330448 1804 return ret;
b9bec74b 1805 }
05330448
AL
1806
1807 for (i = 0; i < ret; i++) {
0d894367
PB
1808 uint32_t index = msrs[i].index;
1809 switch (index) {
05330448
AL
1810 case MSR_IA32_SYSENTER_CS:
1811 env->sysenter_cs = msrs[i].data;
1812 break;
1813 case MSR_IA32_SYSENTER_ESP:
1814 env->sysenter_esp = msrs[i].data;
1815 break;
1816 case MSR_IA32_SYSENTER_EIP:
1817 env->sysenter_eip = msrs[i].data;
1818 break;
0c03266a
JK
1819 case MSR_PAT:
1820 env->pat = msrs[i].data;
1821 break;
05330448
AL
1822 case MSR_STAR:
1823 env->star = msrs[i].data;
1824 break;
1825#ifdef TARGET_X86_64
1826 case MSR_CSTAR:
1827 env->cstar = msrs[i].data;
1828 break;
1829 case MSR_KERNELGSBASE:
1830 env->kernelgsbase = msrs[i].data;
1831 break;
1832 case MSR_FMASK:
1833 env->fmask = msrs[i].data;
1834 break;
1835 case MSR_LSTAR:
1836 env->lstar = msrs[i].data;
1837 break;
1838#endif
1839 case MSR_IA32_TSC:
1840 env->tsc = msrs[i].data;
1841 break;
c9b8f6b6
AS
1842 case MSR_TSC_AUX:
1843 env->tsc_aux = msrs[i].data;
1844 break;
f28558d3
WA
1845 case MSR_TSC_ADJUST:
1846 env->tsc_adjust = msrs[i].data;
1847 break;
aa82ba54
LJ
1848 case MSR_IA32_TSCDEADLINE:
1849 env->tsc_deadline = msrs[i].data;
1850 break;
aa851e36
MT
1851 case MSR_VM_HSAVE_PA:
1852 env->vm_hsave = msrs[i].data;
1853 break;
1a03675d
GC
1854 case MSR_KVM_SYSTEM_TIME:
1855 env->system_time_msr = msrs[i].data;
1856 break;
1857 case MSR_KVM_WALL_CLOCK:
1858 env->wall_clock_msr = msrs[i].data;
1859 break;
57780495
MT
1860 case MSR_MCG_STATUS:
1861 env->mcg_status = msrs[i].data;
1862 break;
1863 case MSR_MCG_CTL:
1864 env->mcg_ctl = msrs[i].data;
1865 break;
21e87c46
AK
1866 case MSR_IA32_MISC_ENABLE:
1867 env->msr_ia32_misc_enable = msrs[i].data;
1868 break;
fc12d72e
PB
1869 case MSR_IA32_SMBASE:
1870 env->smbase = msrs[i].data;
1871 break;
0779caeb
ACL
1872 case MSR_IA32_FEATURE_CONTROL:
1873 env->msr_ia32_feature_control = msrs[i].data;
df67696e 1874 break;
79e9ebeb
LJ
1875 case MSR_IA32_BNDCFGS:
1876 env->msr_bndcfgs = msrs[i].data;
1877 break;
18cd2c17
WL
1878 case MSR_IA32_XSS:
1879 env->xss = msrs[i].data;
1880 break;
57780495 1881 default:
57780495
MT
1882 if (msrs[i].index >= MSR_MC0_CTL &&
1883 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
1884 env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
57780495 1885 }
d8da8574 1886 break;
f6584ee2
GN
1887 case MSR_KVM_ASYNC_PF_EN:
1888 env->async_pf_en_msr = msrs[i].data;
1889 break;
bc9a839d
MT
1890 case MSR_KVM_PV_EOI_EN:
1891 env->pv_eoi_en_msr = msrs[i].data;
1892 break;
917367aa
MT
1893 case MSR_KVM_STEAL_TIME:
1894 env->steal_time_msr = msrs[i].data;
1895 break;
0d894367
PB
1896 case MSR_CORE_PERF_FIXED_CTR_CTRL:
1897 env->msr_fixed_ctr_ctrl = msrs[i].data;
1898 break;
1899 case MSR_CORE_PERF_GLOBAL_CTRL:
1900 env->msr_global_ctrl = msrs[i].data;
1901 break;
1902 case MSR_CORE_PERF_GLOBAL_STATUS:
1903 env->msr_global_status = msrs[i].data;
1904 break;
1905 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
1906 env->msr_global_ovf_ctrl = msrs[i].data;
1907 break;
1908 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1:
1909 env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data;
1910 break;
1911 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
1912 env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
1913 break;
1914 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
1915 env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
1916 break;
1c90ef26
VR
1917 case HV_X64_MSR_HYPERCALL:
1918 env->msr_hv_hypercall = msrs[i].data;
1919 break;
1920 case HV_X64_MSR_GUEST_OS_ID:
1921 env->msr_hv_guest_os_id = msrs[i].data;
1922 break;
5ef68987
VR
1923 case HV_X64_MSR_APIC_ASSIST_PAGE:
1924 env->msr_hv_vapic = msrs[i].data;
1925 break;
48a5f3bc
VR
1926 case HV_X64_MSR_REFERENCE_TSC:
1927 env->msr_hv_tsc = msrs[i].data;
1928 break;
f2a53c9e
AS
1929 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1930 env->msr_hv_crash_params[index - HV_X64_MSR_CRASH_P0] = msrs[i].data;
1931 break;
d1ae67f6
AW
1932 case MSR_MTRRdefType:
1933 env->mtrr_deftype = msrs[i].data;
1934 break;
1935 case MSR_MTRRfix64K_00000:
1936 env->mtrr_fixed[0] = msrs[i].data;
1937 break;
1938 case MSR_MTRRfix16K_80000:
1939 env->mtrr_fixed[1] = msrs[i].data;
1940 break;
1941 case MSR_MTRRfix16K_A0000:
1942 env->mtrr_fixed[2] = msrs[i].data;
1943 break;
1944 case MSR_MTRRfix4K_C0000:
1945 env->mtrr_fixed[3] = msrs[i].data;
1946 break;
1947 case MSR_MTRRfix4K_C8000:
1948 env->mtrr_fixed[4] = msrs[i].data;
1949 break;
1950 case MSR_MTRRfix4K_D0000:
1951 env->mtrr_fixed[5] = msrs[i].data;
1952 break;
1953 case MSR_MTRRfix4K_D8000:
1954 env->mtrr_fixed[6] = msrs[i].data;
1955 break;
1956 case MSR_MTRRfix4K_E0000:
1957 env->mtrr_fixed[7] = msrs[i].data;
1958 break;
1959 case MSR_MTRRfix4K_E8000:
1960 env->mtrr_fixed[8] = msrs[i].data;
1961 break;
1962 case MSR_MTRRfix4K_F0000:
1963 env->mtrr_fixed[9] = msrs[i].data;
1964 break;
1965 case MSR_MTRRfix4K_F8000:
1966 env->mtrr_fixed[10] = msrs[i].data;
1967 break;
1968 case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
1969 if (index & 1) {
1970 env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
1971 } else {
1972 env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
1973 }
1974 break;
05330448
AL
1975 }
1976 }
1977
1978 return 0;
1979}
1980
1bc22652 1981static int kvm_put_mp_state(X86CPU *cpu)
9bdbe550 1982{
1bc22652 1983 struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state };
9bdbe550 1984
1bc22652 1985 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
9bdbe550
HB
1986}
1987
23d02d9b 1988static int kvm_get_mp_state(X86CPU *cpu)
9bdbe550 1989{
259186a7 1990 CPUState *cs = CPU(cpu);
23d02d9b 1991 CPUX86State *env = &cpu->env;
9bdbe550
HB
1992 struct kvm_mp_state mp_state;
1993 int ret;
1994
259186a7 1995 ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state);
9bdbe550
HB
1996 if (ret < 0) {
1997 return ret;
1998 }
1999 env->mp_state = mp_state.mp_state;
c14750e8 2000 if (kvm_irqchip_in_kernel()) {
259186a7 2001 cs->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED);
c14750e8 2002 }
9bdbe550
HB
2003 return 0;
2004}
2005
1bc22652 2006static int kvm_get_apic(X86CPU *cpu)
680c1c6f 2007{
02e51483 2008 DeviceState *apic = cpu->apic_state;
680c1c6f
JK
2009 struct kvm_lapic_state kapic;
2010 int ret;
2011
3d4b2649 2012 if (apic && kvm_irqchip_in_kernel()) {
1bc22652 2013 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic);
680c1c6f
JK
2014 if (ret < 0) {
2015 return ret;
2016 }
2017
2018 kvm_get_apic_state(apic, &kapic);
2019 }
2020 return 0;
2021}
2022
1bc22652 2023static int kvm_put_apic(X86CPU *cpu)
680c1c6f 2024{
02e51483 2025 DeviceState *apic = cpu->apic_state;
680c1c6f
JK
2026 struct kvm_lapic_state kapic;
2027
3d4b2649 2028 if (apic && kvm_irqchip_in_kernel()) {
680c1c6f
JK
2029 kvm_put_apic_state(apic, &kapic);
2030
1bc22652 2031 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_LAPIC, &kapic);
680c1c6f
JK
2032 }
2033 return 0;
2034}
2035
1bc22652 2036static int kvm_put_vcpu_events(X86CPU *cpu, int level)
a0fb002c 2037{
fc12d72e 2038 CPUState *cs = CPU(cpu);
1bc22652 2039 CPUX86State *env = &cpu->env;
076796f8 2040 struct kvm_vcpu_events events = {};
a0fb002c
JK
2041
2042 if (!kvm_has_vcpu_events()) {
2043 return 0;
2044 }
2045
31827373
JK
2046 events.exception.injected = (env->exception_injected >= 0);
2047 events.exception.nr = env->exception_injected;
a0fb002c
JK
2048 events.exception.has_error_code = env->has_error_code;
2049 events.exception.error_code = env->error_code;
7e680753 2050 events.exception.pad = 0;
a0fb002c
JK
2051
2052 events.interrupt.injected = (env->interrupt_injected >= 0);
2053 events.interrupt.nr = env->interrupt_injected;
2054 events.interrupt.soft = env->soft_interrupt;
2055
2056 events.nmi.injected = env->nmi_injected;
2057 events.nmi.pending = env->nmi_pending;
2058 events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
7e680753 2059 events.nmi.pad = 0;
a0fb002c
JK
2060
2061 events.sipi_vector = env->sipi_vector;
2062
fc12d72e
PB
2063 if (has_msr_smbase) {
2064 events.smi.smm = !!(env->hflags & HF_SMM_MASK);
2065 events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK);
2066 if (kvm_irqchip_in_kernel()) {
2067 /* As soon as these are moved to the kernel, remove them
2068 * from cs->interrupt_request.
2069 */
2070 events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI;
2071 events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT;
2072 cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI);
2073 } else {
2074 /* Keep these in cs->interrupt_request. */
2075 events.smi.pending = 0;
2076 events.smi.latched_init = 0;
2077 }
2078 events.flags |= KVM_VCPUEVENT_VALID_SMM;
2079 }
2080
ea643051
JK
2081 events.flags = 0;
2082 if (level >= KVM_PUT_RESET_STATE) {
2083 events.flags |=
2084 KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
2085 }
aee028b9 2086
1bc22652 2087 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
a0fb002c
JK
2088}
2089
1bc22652 2090static int kvm_get_vcpu_events(X86CPU *cpu)
a0fb002c 2091{
1bc22652 2092 CPUX86State *env = &cpu->env;
a0fb002c
JK
2093 struct kvm_vcpu_events events;
2094 int ret;
2095
2096 if (!kvm_has_vcpu_events()) {
2097 return 0;
2098 }
2099
fc12d72e 2100 memset(&events, 0, sizeof(events));
1bc22652 2101 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
a0fb002c
JK
2102 if (ret < 0) {
2103 return ret;
2104 }
31827373 2105 env->exception_injected =
a0fb002c
JK
2106 events.exception.injected ? events.exception.nr : -1;
2107 env->has_error_code = events.exception.has_error_code;
2108 env->error_code = events.exception.error_code;
2109
2110 env->interrupt_injected =
2111 events.interrupt.injected ? events.interrupt.nr : -1;
2112 env->soft_interrupt = events.interrupt.soft;
2113
2114 env->nmi_injected = events.nmi.injected;
2115 env->nmi_pending = events.nmi.pending;
2116 if (events.nmi.masked) {
2117 env->hflags2 |= HF2_NMI_MASK;
2118 } else {
2119 env->hflags2 &= ~HF2_NMI_MASK;
2120 }
2121
fc12d72e
PB
2122 if (events.flags & KVM_VCPUEVENT_VALID_SMM) {
2123 if (events.smi.smm) {
2124 env->hflags |= HF_SMM_MASK;
2125 } else {
2126 env->hflags &= ~HF_SMM_MASK;
2127 }
2128 if (events.smi.pending) {
2129 cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
2130 } else {
2131 cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
2132 }
2133 if (events.smi.smm_inside_nmi) {
2134 env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK;
2135 } else {
2136 env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
2137 }
2138 if (events.smi.latched_init) {
2139 cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
2140 } else {
2141 cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
2142 }
2143 }
2144
a0fb002c 2145 env->sipi_vector = events.sipi_vector;
a0fb002c
JK
2146
2147 return 0;
2148}
2149
1bc22652 2150static int kvm_guest_debug_workarounds(X86CPU *cpu)
b0b1d690 2151{
ed2803da 2152 CPUState *cs = CPU(cpu);
1bc22652 2153 CPUX86State *env = &cpu->env;
b0b1d690 2154 int ret = 0;
b0b1d690
JK
2155 unsigned long reinject_trap = 0;
2156
2157 if (!kvm_has_vcpu_events()) {
2158 if (env->exception_injected == 1) {
2159 reinject_trap = KVM_GUESTDBG_INJECT_DB;
2160 } else if (env->exception_injected == 3) {
2161 reinject_trap = KVM_GUESTDBG_INJECT_BP;
2162 }
2163 env->exception_injected = -1;
2164 }
2165
2166 /*
2167 * Kernels before KVM_CAP_X86_ROBUST_SINGLESTEP overwrote flags.TF
2168 * injected via SET_GUEST_DEBUG while updating GP regs. Work around this
2169 * by updating the debug state once again if single-stepping is on.
2170 * Another reason to call kvm_update_guest_debug here is a pending debug
2171 * trap raise by the guest. On kernels without SET_VCPU_EVENTS we have to
2172 * reinject them via SET_GUEST_DEBUG.
2173 */
2174 if (reinject_trap ||
ed2803da 2175 (!kvm_has_robust_singlestep() && cs->singlestep_enabled)) {
38e478ec 2176 ret = kvm_update_guest_debug(cs, reinject_trap);
b0b1d690 2177 }
b0b1d690
JK
2178 return ret;
2179}
2180
1bc22652 2181static int kvm_put_debugregs(X86CPU *cpu)
ff44f1a3 2182{
1bc22652 2183 CPUX86State *env = &cpu->env;
ff44f1a3
JK
2184 struct kvm_debugregs dbgregs;
2185 int i;
2186
2187 if (!kvm_has_debugregs()) {
2188 return 0;
2189 }
2190
2191 for (i = 0; i < 4; i++) {
2192 dbgregs.db[i] = env->dr[i];
2193 }
2194 dbgregs.dr6 = env->dr[6];
2195 dbgregs.dr7 = env->dr[7];
2196 dbgregs.flags = 0;
2197
1bc22652 2198 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs);
ff44f1a3
JK
2199}
2200
1bc22652 2201static int kvm_get_debugregs(X86CPU *cpu)
ff44f1a3 2202{
1bc22652 2203 CPUX86State *env = &cpu->env;
ff44f1a3
JK
2204 struct kvm_debugregs dbgregs;
2205 int i, ret;
2206
2207 if (!kvm_has_debugregs()) {
2208 return 0;
2209 }
2210
1bc22652 2211 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs);
ff44f1a3 2212 if (ret < 0) {
b9bec74b 2213 return ret;
ff44f1a3
JK
2214 }
2215 for (i = 0; i < 4; i++) {
2216 env->dr[i] = dbgregs.db[i];
2217 }
2218 env->dr[4] = env->dr[6] = dbgregs.dr6;
2219 env->dr[5] = env->dr[7] = dbgregs.dr7;
ff44f1a3
JK
2220
2221 return 0;
2222}
2223
20d695a9 2224int kvm_arch_put_registers(CPUState *cpu, int level)
05330448 2225{
20d695a9 2226 X86CPU *x86_cpu = X86_CPU(cpu);
05330448
AL
2227 int ret;
2228
2fa45344 2229 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
dbaa07c4 2230
6bdf863d
JK
2231 if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) {
2232 ret = kvm_put_msr_feature_control(x86_cpu);
2233 if (ret < 0) {
2234 return ret;
2235 }
2236 }
2237
1bc22652 2238 ret = kvm_getput_regs(x86_cpu, 1);
b9bec74b 2239 if (ret < 0) {
05330448 2240 return ret;
b9bec74b 2241 }
1bc22652 2242 ret = kvm_put_xsave(x86_cpu);
b9bec74b 2243 if (ret < 0) {
f1665b21 2244 return ret;
b9bec74b 2245 }
1bc22652 2246 ret = kvm_put_xcrs(x86_cpu);
b9bec74b 2247 if (ret < 0) {
05330448 2248 return ret;
b9bec74b 2249 }
1bc22652 2250 ret = kvm_put_sregs(x86_cpu);
b9bec74b 2251 if (ret < 0) {
05330448 2252 return ret;
b9bec74b 2253 }
ab443475 2254 /* must be before kvm_put_msrs */
1bc22652 2255 ret = kvm_inject_mce_oldstyle(x86_cpu);
ab443475
JK
2256 if (ret < 0) {
2257 return ret;
2258 }
1bc22652 2259 ret = kvm_put_msrs(x86_cpu, level);
b9bec74b 2260 if (ret < 0) {
05330448 2261 return ret;
b9bec74b 2262 }
ea643051 2263 if (level >= KVM_PUT_RESET_STATE) {
1bc22652 2264 ret = kvm_put_mp_state(x86_cpu);
b9bec74b 2265 if (ret < 0) {
ea643051 2266 return ret;
b9bec74b 2267 }
1bc22652 2268 ret = kvm_put_apic(x86_cpu);
680c1c6f
JK
2269 if (ret < 0) {
2270 return ret;
2271 }
ea643051 2272 }
7477cd38
MT
2273
2274 ret = kvm_put_tscdeadline_msr(x86_cpu);
2275 if (ret < 0) {
2276 return ret;
2277 }
2278
1bc22652 2279 ret = kvm_put_vcpu_events(x86_cpu, level);
b9bec74b 2280 if (ret < 0) {
a0fb002c 2281 return ret;
b9bec74b 2282 }
1bc22652 2283 ret = kvm_put_debugregs(x86_cpu);
b9bec74b 2284 if (ret < 0) {
b0b1d690 2285 return ret;
b9bec74b 2286 }
b0b1d690 2287 /* must be last */
1bc22652 2288 ret = kvm_guest_debug_workarounds(x86_cpu);
b9bec74b 2289 if (ret < 0) {
ff44f1a3 2290 return ret;
b9bec74b 2291 }
05330448
AL
2292 return 0;
2293}
2294
20d695a9 2295int kvm_arch_get_registers(CPUState *cs)
05330448 2296{
20d695a9 2297 X86CPU *cpu = X86_CPU(cs);
05330448
AL
2298 int ret;
2299
20d695a9 2300 assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs));
dbaa07c4 2301
1bc22652 2302 ret = kvm_getput_regs(cpu, 0);
b9bec74b 2303 if (ret < 0) {
05330448 2304 return ret;
b9bec74b 2305 }
1bc22652 2306 ret = kvm_get_xsave(cpu);
b9bec74b 2307 if (ret < 0) {
f1665b21 2308 return ret;
b9bec74b 2309 }
1bc22652 2310 ret = kvm_get_xcrs(cpu);
b9bec74b 2311 if (ret < 0) {
05330448 2312 return ret;
b9bec74b 2313 }
1bc22652 2314 ret = kvm_get_sregs(cpu);
b9bec74b 2315 if (ret < 0) {
05330448 2316 return ret;
b9bec74b 2317 }
1bc22652 2318 ret = kvm_get_msrs(cpu);
b9bec74b 2319 if (ret < 0) {
05330448 2320 return ret;
b9bec74b 2321 }
23d02d9b 2322 ret = kvm_get_mp_state(cpu);
b9bec74b 2323 if (ret < 0) {
5a2e3c2e 2324 return ret;
b9bec74b 2325 }
1bc22652 2326 ret = kvm_get_apic(cpu);
680c1c6f
JK
2327 if (ret < 0) {
2328 return ret;
2329 }
1bc22652 2330 ret = kvm_get_vcpu_events(cpu);
b9bec74b 2331 if (ret < 0) {
a0fb002c 2332 return ret;
b9bec74b 2333 }
1bc22652 2334 ret = kvm_get_debugregs(cpu);
b9bec74b 2335 if (ret < 0) {
ff44f1a3 2336 return ret;
b9bec74b 2337 }
05330448
AL
2338 return 0;
2339}
2340
20d695a9 2341void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
05330448 2342{
20d695a9
AF
2343 X86CPU *x86_cpu = X86_CPU(cpu);
2344 CPUX86State *env = &x86_cpu->env;
ce377af3
JK
2345 int ret;
2346
276ce815 2347 /* Inject NMI */
fc12d72e
PB
2348 if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
2349 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
2350 qemu_mutex_lock_iothread();
2351 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
2352 qemu_mutex_unlock_iothread();
2353 DPRINTF("injected NMI\n");
2354 ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
2355 if (ret < 0) {
2356 fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
2357 strerror(-ret));
2358 }
2359 }
2360 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
2361 qemu_mutex_lock_iothread();
2362 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
2363 qemu_mutex_unlock_iothread();
2364 DPRINTF("injected SMI\n");
2365 ret = kvm_vcpu_ioctl(cpu, KVM_SMI);
2366 if (ret < 0) {
2367 fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n",
2368 strerror(-ret));
2369 }
ce377af3 2370 }
276ce815
LJ
2371 }
2372
4b8523ee
JK
2373 if (!kvm_irqchip_in_kernel()) {
2374 qemu_mutex_lock_iothread();
2375 }
2376
e0723c45
PB
2377 /* Force the VCPU out of its inner loop to process any INIT requests
2378 * or (for userspace APIC, but it is cheap to combine the checks here)
2379 * pending TPR access reports.
2380 */
2381 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
fc12d72e
PB
2382 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
2383 !(env->hflags & HF_SMM_MASK)) {
2384 cpu->exit_request = 1;
2385 }
2386 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
2387 cpu->exit_request = 1;
2388 }
e0723c45 2389 }
05330448 2390
e0723c45 2391 if (!kvm_irqchip_in_kernel()) {
db1669bc
JK
2392 /* Try to inject an interrupt if the guest can accept it */
2393 if (run->ready_for_interrupt_injection &&
259186a7 2394 (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
db1669bc
JK
2395 (env->eflags & IF_MASK)) {
2396 int irq;
2397
259186a7 2398 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
db1669bc
JK
2399 irq = cpu_get_pic_interrupt(env);
2400 if (irq >= 0) {
2401 struct kvm_interrupt intr;
2402
2403 intr.irq = irq;
db1669bc 2404 DPRINTF("injected interrupt %d\n", irq);
1bc22652 2405 ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
ce377af3
JK
2406 if (ret < 0) {
2407 fprintf(stderr,
2408 "KVM: injection failed, interrupt lost (%s)\n",
2409 strerror(-ret));
2410 }
db1669bc
JK
2411 }
2412 }
05330448 2413
db1669bc
JK
2414 /* If we have an interrupt but the guest is not ready to receive an
2415 * interrupt, request an interrupt window exit. This will
2416 * cause a return to userspace as soon as the guest is ready to
2417 * receive interrupts. */
259186a7 2418 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
db1669bc
JK
2419 run->request_interrupt_window = 1;
2420 } else {
2421 run->request_interrupt_window = 0;
2422 }
2423
2424 DPRINTF("setting tpr\n");
02e51483 2425 run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
4b8523ee
JK
2426
2427 qemu_mutex_unlock_iothread();
db1669bc 2428 }
05330448
AL
2429}
2430
4c663752 2431MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
05330448 2432{
20d695a9
AF
2433 X86CPU *x86_cpu = X86_CPU(cpu);
2434 CPUX86State *env = &x86_cpu->env;
2435
fc12d72e
PB
2436 if (run->flags & KVM_RUN_X86_SMM) {
2437 env->hflags |= HF_SMM_MASK;
2438 } else {
2439 env->hflags &= HF_SMM_MASK;
2440 }
b9bec74b 2441 if (run->if_flag) {
05330448 2442 env->eflags |= IF_MASK;
b9bec74b 2443 } else {
05330448 2444 env->eflags &= ~IF_MASK;
b9bec74b 2445 }
4b8523ee
JK
2446
2447 /* We need to protect the apic state against concurrent accesses from
2448 * different threads in case the userspace irqchip is used. */
2449 if (!kvm_irqchip_in_kernel()) {
2450 qemu_mutex_lock_iothread();
2451 }
02e51483
CF
2452 cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
2453 cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
4b8523ee
JK
2454 if (!kvm_irqchip_in_kernel()) {
2455 qemu_mutex_unlock_iothread();
2456 }
f794aa4a 2457 return cpu_get_mem_attrs(env);
05330448
AL
2458}
2459
20d695a9 2460int kvm_arch_process_async_events(CPUState *cs)
0af691d7 2461{
20d695a9
AF
2462 X86CPU *cpu = X86_CPU(cs);
2463 CPUX86State *env = &cpu->env;
232fc23b 2464
259186a7 2465 if (cs->interrupt_request & CPU_INTERRUPT_MCE) {
ab443475
JK
2466 /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */
2467 assert(env->mcg_cap);
2468
259186a7 2469 cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
ab443475 2470
dd1750d7 2471 kvm_cpu_synchronize_state(cs);
ab443475
JK
2472
2473 if (env->exception_injected == EXCP08_DBLE) {
2474 /* this means triple fault */
2475 qemu_system_reset_request();
fcd7d003 2476 cs->exit_request = 1;
ab443475
JK
2477 return 0;
2478 }
2479 env->exception_injected = EXCP12_MCHK;
2480 env->has_error_code = 0;
2481
259186a7 2482 cs->halted = 0;
ab443475
JK
2483 if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) {
2484 env->mp_state = KVM_MP_STATE_RUNNABLE;
2485 }
2486 }
2487
fc12d72e
PB
2488 if ((cs->interrupt_request & CPU_INTERRUPT_INIT) &&
2489 !(env->hflags & HF_SMM_MASK)) {
e0723c45
PB
2490 kvm_cpu_synchronize_state(cs);
2491 do_cpu_init(cpu);
2492 }
2493
db1669bc
JK
2494 if (kvm_irqchip_in_kernel()) {
2495 return 0;
2496 }
2497
259186a7
AF
2498 if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
2499 cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
02e51483 2500 apic_poll_irq(cpu->apic_state);
5d62c43a 2501 }
259186a7 2502 if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
4601f7b0 2503 (env->eflags & IF_MASK)) ||
259186a7
AF
2504 (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
2505 cs->halted = 0;
6792a57b 2506 }
259186a7 2507 if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
dd1750d7 2508 kvm_cpu_synchronize_state(cs);
232fc23b 2509 do_cpu_sipi(cpu);
0af691d7 2510 }
259186a7
AF
2511 if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
2512 cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
dd1750d7 2513 kvm_cpu_synchronize_state(cs);
02e51483 2514 apic_handle_tpr_access_report(cpu->apic_state, env->eip,
d362e757
JK
2515 env->tpr_access_type);
2516 }
0af691d7 2517
259186a7 2518 return cs->halted;
0af691d7
MT
2519}
2520
839b5630 2521static int kvm_handle_halt(X86CPU *cpu)
05330448 2522{
259186a7 2523 CPUState *cs = CPU(cpu);
839b5630
AF
2524 CPUX86State *env = &cpu->env;
2525
259186a7 2526 if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
05330448 2527 (env->eflags & IF_MASK)) &&
259186a7
AF
2528 !(cs->interrupt_request & CPU_INTERRUPT_NMI)) {
2529 cs->halted = 1;
bb4ea393 2530 return EXCP_HLT;
05330448
AL
2531 }
2532
bb4ea393 2533 return 0;
05330448
AL
2534}
2535
f7575c96 2536static int kvm_handle_tpr_access(X86CPU *cpu)
d362e757 2537{
f7575c96
AF
2538 CPUState *cs = CPU(cpu);
2539 struct kvm_run *run = cs->kvm_run;
d362e757 2540
02e51483 2541 apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip,
d362e757
JK
2542 run->tpr_access.is_write ? TPR_ACCESS_WRITE
2543 : TPR_ACCESS_READ);
2544 return 1;
2545}
2546
f17ec444 2547int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
e22a25c9 2548{
38972938 2549 static const uint8_t int3 = 0xcc;
64bf3f4e 2550
f17ec444
AF
2551 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
2552 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) {
e22a25c9 2553 return -EINVAL;
b9bec74b 2554 }
e22a25c9
AL
2555 return 0;
2556}
2557
f17ec444 2558int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
e22a25c9
AL
2559{
2560 uint8_t int3;
2561
f17ec444
AF
2562 if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
2563 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) {
e22a25c9 2564 return -EINVAL;
b9bec74b 2565 }
e22a25c9
AL
2566 return 0;
2567}
2568
2569static struct {
2570 target_ulong addr;
2571 int len;
2572 int type;
2573} hw_breakpoint[4];
2574
2575static int nb_hw_breakpoint;
2576
2577static int find_hw_breakpoint(target_ulong addr, int len, int type)
2578{
2579 int n;
2580
b9bec74b 2581 for (n = 0; n < nb_hw_breakpoint; n++) {
e22a25c9 2582 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
b9bec74b 2583 (hw_breakpoint[n].len == len || len == -1)) {
e22a25c9 2584 return n;
b9bec74b
JK
2585 }
2586 }
e22a25c9
AL
2587 return -1;
2588}
2589
2590int kvm_arch_insert_hw_breakpoint(target_ulong addr,
2591 target_ulong len, int type)
2592{
2593 switch (type) {
2594 case GDB_BREAKPOINT_HW:
2595 len = 1;
2596 break;
2597 case GDB_WATCHPOINT_WRITE:
2598 case GDB_WATCHPOINT_ACCESS:
2599 switch (len) {
2600 case 1:
2601 break;
2602 case 2:
2603 case 4:
2604 case 8:
b9bec74b 2605 if (addr & (len - 1)) {
e22a25c9 2606 return -EINVAL;
b9bec74b 2607 }
e22a25c9
AL
2608 break;
2609 default:
2610 return -EINVAL;
2611 }
2612 break;
2613 default:
2614 return -ENOSYS;
2615 }
2616
b9bec74b 2617 if (nb_hw_breakpoint == 4) {
e22a25c9 2618 return -ENOBUFS;
b9bec74b
JK
2619 }
2620 if (find_hw_breakpoint(addr, len, type) >= 0) {
e22a25c9 2621 return -EEXIST;
b9bec74b 2622 }
e22a25c9
AL
2623 hw_breakpoint[nb_hw_breakpoint].addr = addr;
2624 hw_breakpoint[nb_hw_breakpoint].len = len;
2625 hw_breakpoint[nb_hw_breakpoint].type = type;
2626 nb_hw_breakpoint++;
2627
2628 return 0;
2629}
2630
2631int kvm_arch_remove_hw_breakpoint(target_ulong addr,
2632 target_ulong len, int type)
2633{
2634 int n;
2635
2636 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
b9bec74b 2637 if (n < 0) {
e22a25c9 2638 return -ENOENT;
b9bec74b 2639 }
e22a25c9
AL
2640 nb_hw_breakpoint--;
2641 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
2642
2643 return 0;
2644}
2645
2646void kvm_arch_remove_all_hw_breakpoints(void)
2647{
2648 nb_hw_breakpoint = 0;
2649}
2650
2651static CPUWatchpoint hw_watchpoint;
2652
a60f24b5 2653static int kvm_handle_debug(X86CPU *cpu,
48405526 2654 struct kvm_debug_exit_arch *arch_info)
e22a25c9 2655{
ed2803da 2656 CPUState *cs = CPU(cpu);
a60f24b5 2657 CPUX86State *env = &cpu->env;
f2574737 2658 int ret = 0;
e22a25c9
AL
2659 int n;
2660
2661 if (arch_info->exception == 1) {
2662 if (arch_info->dr6 & (1 << 14)) {
ed2803da 2663 if (cs->singlestep_enabled) {
f2574737 2664 ret = EXCP_DEBUG;
b9bec74b 2665 }
e22a25c9 2666 } else {
b9bec74b
JK
2667 for (n = 0; n < 4; n++) {
2668 if (arch_info->dr6 & (1 << n)) {
e22a25c9
AL
2669 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
2670 case 0x0:
f2574737 2671 ret = EXCP_DEBUG;
e22a25c9
AL
2672 break;
2673 case 0x1:
f2574737 2674 ret = EXCP_DEBUG;
ff4700b0 2675 cs->watchpoint_hit = &hw_watchpoint;
e22a25c9
AL
2676 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
2677 hw_watchpoint.flags = BP_MEM_WRITE;
2678 break;
2679 case 0x3:
f2574737 2680 ret = EXCP_DEBUG;
ff4700b0 2681 cs->watchpoint_hit = &hw_watchpoint;
e22a25c9
AL
2682 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
2683 hw_watchpoint.flags = BP_MEM_ACCESS;
2684 break;
2685 }
b9bec74b
JK
2686 }
2687 }
e22a25c9 2688 }
ff4700b0 2689 } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) {
f2574737 2690 ret = EXCP_DEBUG;
b9bec74b 2691 }
f2574737 2692 if (ret == 0) {
ff4700b0 2693 cpu_synchronize_state(cs);
48405526 2694 assert(env->exception_injected == -1);
b0b1d690 2695
f2574737 2696 /* pass to guest */
48405526
BS
2697 env->exception_injected = arch_info->exception;
2698 env->has_error_code = 0;
b0b1d690 2699 }
e22a25c9 2700
f2574737 2701 return ret;
e22a25c9
AL
2702}
2703
20d695a9 2704void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
e22a25c9
AL
2705{
2706 const uint8_t type_code[] = {
2707 [GDB_BREAKPOINT_HW] = 0x0,
2708 [GDB_WATCHPOINT_WRITE] = 0x1,
2709 [GDB_WATCHPOINT_ACCESS] = 0x3
2710 };
2711 const uint8_t len_code[] = {
2712 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
2713 };
2714 int n;
2715
a60f24b5 2716 if (kvm_sw_breakpoints_active(cpu)) {
e22a25c9 2717 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
b9bec74b 2718 }
e22a25c9
AL
2719 if (nb_hw_breakpoint > 0) {
2720 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
2721 dbg->arch.debugreg[7] = 0x0600;
2722 for (n = 0; n < nb_hw_breakpoint; n++) {
2723 dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
2724 dbg->arch.debugreg[7] |= (2 << (n * 2)) |
2725 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
95c077c9 2726 ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4));
e22a25c9
AL
2727 }
2728 }
2729}
4513d923 2730
2a4dac83
JK
2731static bool host_supports_vmx(void)
2732{
2733 uint32_t ecx, unused;
2734
2735 host_cpuid(1, 0, &unused, &unused, &ecx, &unused);
2736 return ecx & CPUID_EXT_VMX;
2737}
2738
2739#define VMX_INVALID_GUEST_STATE 0x80000021
2740
20d695a9 2741int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
2a4dac83 2742{
20d695a9 2743 X86CPU *cpu = X86_CPU(cs);
2a4dac83
JK
2744 uint64_t code;
2745 int ret;
2746
2747 switch (run->exit_reason) {
2748 case KVM_EXIT_HLT:
2749 DPRINTF("handle_hlt\n");
4b8523ee 2750 qemu_mutex_lock_iothread();
839b5630 2751 ret = kvm_handle_halt(cpu);
4b8523ee 2752 qemu_mutex_unlock_iothread();
2a4dac83
JK
2753 break;
2754 case KVM_EXIT_SET_TPR:
2755 ret = 0;
2756 break;
d362e757 2757 case KVM_EXIT_TPR_ACCESS:
4b8523ee 2758 qemu_mutex_lock_iothread();
f7575c96 2759 ret = kvm_handle_tpr_access(cpu);
4b8523ee 2760 qemu_mutex_unlock_iothread();
d362e757 2761 break;
2a4dac83
JK
2762 case KVM_EXIT_FAIL_ENTRY:
2763 code = run->fail_entry.hardware_entry_failure_reason;
2764 fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n",
2765 code);
2766 if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) {
2767 fprintf(stderr,
12619721 2768 "\nIf you're running a guest on an Intel machine without "
2a4dac83
JK
2769 "unrestricted mode\n"
2770 "support, the failure can be most likely due to the guest "
2771 "entering an invalid\n"
2772 "state for Intel VT. For example, the guest maybe running "
2773 "in big real mode\n"
2774 "which is not supported on less recent Intel processors."
2775 "\n\n");
2776 }
2777 ret = -1;
2778 break;
2779 case KVM_EXIT_EXCEPTION:
2780 fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n",
2781 run->ex.exception, run->ex.error_code);
2782 ret = -1;
2783 break;
f2574737
JK
2784 case KVM_EXIT_DEBUG:
2785 DPRINTF("kvm_exit_debug\n");
4b8523ee 2786 qemu_mutex_lock_iothread();
a60f24b5 2787 ret = kvm_handle_debug(cpu, &run->debug.arch);
4b8523ee 2788 qemu_mutex_unlock_iothread();
f2574737 2789 break;
2a4dac83
JK
2790 default:
2791 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
2792 ret = -1;
2793 break;
2794 }
2795
2796 return ret;
2797}
2798
20d695a9 2799bool kvm_arch_stop_on_emulation_error(CPUState *cs)
4513d923 2800{
20d695a9
AF
2801 X86CPU *cpu = X86_CPU(cs);
2802 CPUX86State *env = &cpu->env;
2803
dd1750d7 2804 kvm_cpu_synchronize_state(cs);
b9bec74b
JK
2805 return !(env->cr[0] & CR0_PE_MASK) ||
2806 ((env->segs[R_CS].selector & 3) != 3);
4513d923 2807}
84b058d7
JK
2808
2809void kvm_arch_init_irq_routing(KVMState *s)
2810{
2811 if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
2812 /* If kernel can't do irq routing, interrupt source
2813 * override 0->2 cannot be set up as required by HPET.
2814 * So we have to disable it.
2815 */
2816 no_hpet = 1;
2817 }
cc7e0ddf 2818 /* We know at this point that we're using the in-kernel
614e41bc 2819 * irqchip, so we can use irqfds, and on x86 we know
f3e1bed8 2820 * we can use msi via irqfd and GSI routing.
cc7e0ddf 2821 */
614e41bc 2822 kvm_msi_via_irqfd_allowed = true;
f3e1bed8 2823 kvm_gsi_routing_allowed = true;
84b058d7 2824}
b139bd30
JK
2825
2826/* Classic KVM device assignment interface. Will remain x86 only. */
2827int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr,
2828 uint32_t flags, uint32_t *dev_id)
2829{
2830 struct kvm_assigned_pci_dev dev_data = {
2831 .segnr = dev_addr->domain,
2832 .busnr = dev_addr->bus,
2833 .devfn = PCI_DEVFN(dev_addr->slot, dev_addr->function),
2834 .flags = flags,
2835 };
2836 int ret;
2837
2838 dev_data.assigned_dev_id =
2839 (dev_addr->domain << 16) | (dev_addr->bus << 8) | dev_data.devfn;
2840
2841 ret = kvm_vm_ioctl(s, KVM_ASSIGN_PCI_DEVICE, &dev_data);
2842 if (ret < 0) {
2843 return ret;
2844 }
2845
2846 *dev_id = dev_data.assigned_dev_id;
2847
2848 return 0;
2849}
2850
2851int kvm_device_pci_deassign(KVMState *s, uint32_t dev_id)
2852{
2853 struct kvm_assigned_pci_dev dev_data = {
2854 .assigned_dev_id = dev_id,
2855 };
2856
2857 return kvm_vm_ioctl(s, KVM_DEASSIGN_PCI_DEVICE, &dev_data);
2858}
2859
2860static int kvm_assign_irq_internal(KVMState *s, uint32_t dev_id,
2861 uint32_t irq_type, uint32_t guest_irq)
2862{
2863 struct kvm_assigned_irq assigned_irq = {
2864 .assigned_dev_id = dev_id,
2865 .guest_irq = guest_irq,
2866 .flags = irq_type,
2867 };
2868
2869 if (kvm_check_extension(s, KVM_CAP_ASSIGN_DEV_IRQ)) {
2870 return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_IRQ, &assigned_irq);
2871 } else {
2872 return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, &assigned_irq);
2873 }
2874}
2875
2876int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, bool use_host_msi,
2877 uint32_t guest_irq)
2878{
2879 uint32_t irq_type = KVM_DEV_IRQ_GUEST_INTX |
2880 (use_host_msi ? KVM_DEV_IRQ_HOST_MSI : KVM_DEV_IRQ_HOST_INTX);
2881
2882 return kvm_assign_irq_internal(s, dev_id, irq_type, guest_irq);
2883}
2884
2885int kvm_device_intx_set_mask(KVMState *s, uint32_t dev_id, bool masked)
2886{
2887 struct kvm_assigned_pci_dev dev_data = {
2888 .assigned_dev_id = dev_id,
2889 .flags = masked ? KVM_DEV_ASSIGN_MASK_INTX : 0,
2890 };
2891
2892 return kvm_vm_ioctl(s, KVM_ASSIGN_SET_INTX_MASK, &dev_data);
2893}
2894
2895static int kvm_deassign_irq_internal(KVMState *s, uint32_t dev_id,
2896 uint32_t type)
2897{
2898 struct kvm_assigned_irq assigned_irq = {
2899 .assigned_dev_id = dev_id,
2900 .flags = type,
2901 };
2902
2903 return kvm_vm_ioctl(s, KVM_DEASSIGN_DEV_IRQ, &assigned_irq);
2904}
2905
2906int kvm_device_intx_deassign(KVMState *s, uint32_t dev_id, bool use_host_msi)
2907{
2908 return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_INTX |
2909 (use_host_msi ? KVM_DEV_IRQ_HOST_MSI : KVM_DEV_IRQ_HOST_INTX));
2910}
2911
2912int kvm_device_msi_assign(KVMState *s, uint32_t dev_id, int virq)
2913{
2914 return kvm_assign_irq_internal(s, dev_id, KVM_DEV_IRQ_HOST_MSI |
2915 KVM_DEV_IRQ_GUEST_MSI, virq);
2916}
2917
2918int kvm_device_msi_deassign(KVMState *s, uint32_t dev_id)
2919{
2920 return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSI |
2921 KVM_DEV_IRQ_HOST_MSI);
2922}
2923
2924bool kvm_device_msix_supported(KVMState *s)
2925{
2926 /* The kernel lacks a corresponding KVM_CAP, so we probe by calling
2927 * KVM_ASSIGN_SET_MSIX_NR with an invalid parameter. */
2928 return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_NR, NULL) == -EFAULT;
2929}
2930
2931int kvm_device_msix_init_vectors(KVMState *s, uint32_t dev_id,
2932 uint32_t nr_vectors)
2933{
2934 struct kvm_assigned_msix_nr msix_nr = {
2935 .assigned_dev_id = dev_id,
2936 .entry_nr = nr_vectors,
2937 };
2938
2939 return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_NR, &msix_nr);
2940}
2941
2942int kvm_device_msix_set_vector(KVMState *s, uint32_t dev_id, uint32_t vector,
2943 int virq)
2944{
2945 struct kvm_assigned_msix_entry msix_entry = {
2946 .assigned_dev_id = dev_id,
2947 .gsi = virq,
2948 .entry = vector,
2949 };
2950
2951 return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_ENTRY, &msix_entry);
2952}
2953
2954int kvm_device_msix_assign(KVMState *s, uint32_t dev_id)
2955{
2956 return kvm_assign_irq_internal(s, dev_id, KVM_DEV_IRQ_HOST_MSIX |
2957 KVM_DEV_IRQ_GUEST_MSIX, 0);
2958}
2959
2960int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id)
2961{
2962 return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSIX |
2963 KVM_DEV_IRQ_HOST_MSIX);
2964}
9e03a040
FB
2965
2966int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2967 uint64_t address, uint32_t data)
2968{
2969 return 0;
2970}
1850b6b7
EA
2971
2972int kvm_arch_msi_data_to_gsi(uint32_t data)
2973{
2974 abort();
2975}