]> git.proxmox.com Git - qemu.git/blame - target-i386/kvm.c
Add RAM -> physical addr mapping in MCE simulation
[qemu.git] / target-i386 / kvm.c
CommitLineData
05330448
AL
1/*
2 * QEMU KVM support
3 *
4 * Copyright (C) 2006-2008 Qumranet Technologies
5 * Copyright IBM, Corp. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11 * See the COPYING file in the top-level directory.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/ioctl.h>
17#include <sys/mman.h>
18
19#include <linux/kvm.h>
20
21#include "qemu-common.h"
22#include "sysemu.h"
23#include "kvm.h"
24#include "cpu.h"
e22a25c9 25#include "gdbstub.h"
0e607a80 26#include "host-utils.h"
4c5b10b7 27#include "hw/pc.h"
408392b3 28#include "hw/apic.h"
35bed8ee 29#include "ioport.h"
e7701825 30#include "kvm_x86.h"
05330448 31
bb0300dc
GN
32#ifdef CONFIG_KVM_PARA
33#include <linux/kvm_para.h>
34#endif
35//
05330448
AL
36//#define DEBUG_KVM
37
38#ifdef DEBUG_KVM
8c0d577e 39#define DPRINTF(fmt, ...) \
05330448
AL
40 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
41#else
8c0d577e 42#define DPRINTF(fmt, ...) \
05330448
AL
43 do { } while (0)
44#endif
45
1a03675d
GC
46#define MSR_KVM_WALL_CLOCK 0x11
47#define MSR_KVM_SYSTEM_TIME 0x12
48
b827df58
AK
49#ifdef KVM_CAP_EXT_CPUID
50
51static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
52{
53 struct kvm_cpuid2 *cpuid;
54 int r, size;
55
56 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
57 cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
58 cpuid->nent = max;
59 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
76ae317f
MM
60 if (r == 0 && cpuid->nent >= max) {
61 r = -E2BIG;
62 }
b827df58
AK
63 if (r < 0) {
64 if (r == -E2BIG) {
65 qemu_free(cpuid);
66 return NULL;
67 } else {
68 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
69 strerror(-r));
70 exit(1);
71 }
72 }
73 return cpuid;
74}
75
c958a8bd
SY
76uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
77 uint32_t index, int reg)
b827df58
AK
78{
79 struct kvm_cpuid2 *cpuid;
80 int i, max;
81 uint32_t ret = 0;
82 uint32_t cpuid_1_edx;
83
84 if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
85 return -1U;
86 }
87
88 max = 1;
89 while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
90 max *= 2;
91 }
92
93 for (i = 0; i < cpuid->nent; ++i) {
c958a8bd
SY
94 if (cpuid->entries[i].function == function &&
95 cpuid->entries[i].index == index) {
b827df58
AK
96 switch (reg) {
97 case R_EAX:
98 ret = cpuid->entries[i].eax;
99 break;
100 case R_EBX:
101 ret = cpuid->entries[i].ebx;
102 break;
103 case R_ECX:
104 ret = cpuid->entries[i].ecx;
105 break;
106 case R_EDX:
107 ret = cpuid->entries[i].edx;
19ccb8ea
JK
108 switch (function) {
109 case 1:
110 /* KVM before 2.6.30 misreports the following features */
111 ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA;
112 break;
113 case 0x80000001:
b827df58
AK
114 /* On Intel, kvm returns cpuid according to the Intel spec,
115 * so add missing bits according to the AMD spec:
116 */
c958a8bd 117 cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
c1667e40 118 ret |= cpuid_1_edx & 0x183f7ff;
19ccb8ea 119 break;
b827df58
AK
120 }
121 break;
122 }
123 }
124 }
125
126 qemu_free(cpuid);
127
128 return ret;
129}
130
131#else
132
c958a8bd
SY
133uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
134 uint32_t index, int reg)
b827df58
AK
135{
136 return -1U;
137}
138
139#endif
140
bb0300dc
GN
141#ifdef CONFIG_KVM_PARA
142struct kvm_para_features {
143 int cap;
144 int feature;
145} para_features[] = {
146#ifdef KVM_CAP_CLOCKSOURCE
147 { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
148#endif
149#ifdef KVM_CAP_NOP_IO_DELAY
150 { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
151#endif
152#ifdef KVM_CAP_PV_MMU
153 { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
bb0300dc
GN
154#endif
155 { -1, -1 }
156};
157
158static int get_para_features(CPUState *env)
159{
160 int i, features = 0;
161
162 for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
163 if (kvm_check_extension(env->kvm_state, para_features[i].cap))
164 features |= (1 << para_features[i].feature);
165 }
166
167 return features;
168}
169#endif
170
e7701825
MT
171#ifdef KVM_CAP_MCE
172static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
173 int *max_banks)
174{
175 int r;
176
177 r = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
178 if (r > 0) {
179 *max_banks = r;
180 return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
181 }
182 return -ENOSYS;
183}
184
185static int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap)
186{
187 return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap);
188}
189
190static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
191{
192 return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
193}
194
195struct kvm_x86_mce_data
196{
197 CPUState *env;
198 struct kvm_x86_mce *mce;
199};
200
201static void kvm_do_inject_x86_mce(void *_data)
202{
203 struct kvm_x86_mce_data *data = _data;
204 int r;
205
206 r = kvm_set_mce(data->env, data->mce);
207 if (r < 0)
208 perror("kvm_set_mce FAILED");
209}
210#endif
211
212void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
213 uint64_t mcg_status, uint64_t addr, uint64_t misc)
214{
215#ifdef KVM_CAP_MCE
216 struct kvm_x86_mce mce = {
217 .bank = bank,
218 .status = status,
219 .mcg_status = mcg_status,
220 .addr = addr,
221 .misc = misc,
222 };
223 struct kvm_x86_mce_data data = {
224 .env = cenv,
225 .mce = &mce,
226 };
227
228 run_on_cpu(cenv, kvm_do_inject_x86_mce, &data);
229#endif
230}
231
05330448
AL
232int kvm_arch_init_vcpu(CPUState *env)
233{
234 struct {
486bd5a2
AL
235 struct kvm_cpuid2 cpuid;
236 struct kvm_cpuid_entry2 entries[100];
05330448 237 } __attribute__((packed)) cpuid_data;
486bd5a2 238 uint32_t limit, i, j, cpuid_i;
a33609ca 239 uint32_t unused;
bb0300dc
GN
240 struct kvm_cpuid_entry2 *c;
241#ifdef KVM_CPUID_SIGNATURE
242 uint32_t signature[3];
243#endif
05330448 244
f8d926e9
JK
245 env->mp_state = KVM_MP_STATE_RUNNABLE;
246
c958a8bd 247 env->cpuid_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
6c0d7ee8
AP
248
249 i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
c958a8bd 250 env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_ECX);
6c0d7ee8
AP
251 env->cpuid_ext_features |= i;
252
457dfed6 253 env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
c958a8bd 254 0, R_EDX);
457dfed6 255 env->cpuid_ext3_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
c958a8bd 256 0, R_ECX);
296acb64
JR
257 env->cpuid_svm_features &= kvm_arch_get_supported_cpuid(env, 0x8000000A,
258 0, R_EDX);
259
6c1f42fe 260
05330448
AL
261 cpuid_i = 0;
262
bb0300dc
GN
263#ifdef CONFIG_KVM_PARA
264 /* Paravirtualization CPUIDs */
265 memcpy(signature, "KVMKVMKVM\0\0\0", 12);
266 c = &cpuid_data.entries[cpuid_i++];
267 memset(c, 0, sizeof(*c));
268 c->function = KVM_CPUID_SIGNATURE;
269 c->eax = 0;
270 c->ebx = signature[0];
271 c->ecx = signature[1];
272 c->edx = signature[2];
273
274 c = &cpuid_data.entries[cpuid_i++];
275 memset(c, 0, sizeof(*c));
276 c->function = KVM_CPUID_FEATURES;
277 c->eax = env->cpuid_kvm_features & get_para_features(env);
278#endif
279
a33609ca 280 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
05330448
AL
281
282 for (i = 0; i <= limit; i++) {
bb0300dc 283 c = &cpuid_data.entries[cpuid_i++];
486bd5a2
AL
284
285 switch (i) {
a36b1029
AL
286 case 2: {
287 /* Keep reading function 2 till all the input is received */
288 int times;
289
a36b1029 290 c->function = i;
a33609ca
AL
291 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
292 KVM_CPUID_FLAG_STATE_READ_NEXT;
293 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
294 times = c->eax & 0xff;
a36b1029
AL
295
296 for (j = 1; j < times; ++j) {
a33609ca 297 c = &cpuid_data.entries[cpuid_i++];
a36b1029 298 c->function = i;
a33609ca
AL
299 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
300 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
a36b1029
AL
301 }
302 break;
303 }
486bd5a2
AL
304 case 4:
305 case 0xb:
306 case 0xd:
307 for (j = 0; ; j++) {
486bd5a2
AL
308 c->function = i;
309 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
310 c->index = j;
a33609ca 311 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
486bd5a2 312
a33609ca 313 if (i == 4 && c->eax == 0)
486bd5a2 314 break;
a33609ca 315 if (i == 0xb && !(c->ecx & 0xff00))
486bd5a2 316 break;
a33609ca 317 if (i == 0xd && c->eax == 0)
486bd5a2 318 break;
a33609ca
AL
319
320 c = &cpuid_data.entries[cpuid_i++];
486bd5a2
AL
321 }
322 break;
323 default:
486bd5a2 324 c->function = i;
a33609ca
AL
325 c->flags = 0;
326 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
486bd5a2
AL
327 break;
328 }
05330448 329 }
a33609ca 330 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
05330448
AL
331
332 for (i = 0x80000000; i <= limit; i++) {
bb0300dc 333 c = &cpuid_data.entries[cpuid_i++];
05330448 334
05330448 335 c->function = i;
a33609ca
AL
336 c->flags = 0;
337 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
05330448
AL
338 }
339
340 cpuid_data.cpuid.nent = cpuid_i;
341
e7701825
MT
342#ifdef KVM_CAP_MCE
343 if (((env->cpuid_version >> 8)&0xF) >= 6
344 && (env->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
345 && kvm_check_extension(env->kvm_state, KVM_CAP_MCE) > 0) {
346 uint64_t mcg_cap;
347 int banks;
348
349 if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks))
350 perror("kvm_get_mce_cap_supported FAILED");
351 else {
352 if (banks > MCE_BANKS_DEF)
353 banks = MCE_BANKS_DEF;
354 mcg_cap &= MCE_CAP_DEF;
355 mcg_cap |= banks;
356 if (kvm_setup_mce(env, &mcg_cap))
357 perror("kvm_setup_mce FAILED");
358 else
359 env->mcg_cap = mcg_cap;
360 }
361 }
362#endif
363
486bd5a2 364 return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
05330448
AL
365}
366
caa5af0f
JK
367void kvm_arch_reset_vcpu(CPUState *env)
368{
e73223a5 369 env->exception_injected = -1;
0e607a80 370 env->interrupt_injected = -1;
a0fb002c
JK
371 env->nmi_injected = 0;
372 env->nmi_pending = 0;
ddced198
MT
373 if (kvm_irqchip_in_kernel()) {
374 env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE :
375 KVM_MP_STATE_UNINITIALIZED;
376 } else {
377 env->mp_state = KVM_MP_STATE_RUNNABLE;
378 }
caa5af0f
JK
379}
380
05330448
AL
381static int kvm_has_msr_star(CPUState *env)
382{
383 static int has_msr_star;
384 int ret;
385
386 /* first time */
387 if (has_msr_star == 0) {
388 struct kvm_msr_list msr_list, *kvm_msr_list;
389
390 has_msr_star = -1;
391
392 /* Obtain MSR list from KVM. These are the MSRs that we must
393 * save/restore */
4c9f7372 394 msr_list.nmsrs = 0;
05330448 395 ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
6fb6d245 396 if (ret < 0 && ret != -E2BIG) {
05330448 397 return 0;
6fb6d245 398 }
d9db889f
JK
399 /* Old kernel modules had a bug and could write beyond the provided
400 memory. Allocate at least a safe amount of 1K. */
401 kvm_msr_list = qemu_mallocz(MAX(1024, sizeof(msr_list) +
402 msr_list.nmsrs *
403 sizeof(msr_list.indices[0])));
05330448 404
55308450 405 kvm_msr_list->nmsrs = msr_list.nmsrs;
05330448
AL
406 ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
407 if (ret >= 0) {
408 int i;
409
410 for (i = 0; i < kvm_msr_list->nmsrs; i++) {
411 if (kvm_msr_list->indices[i] == MSR_STAR) {
412 has_msr_star = 1;
413 break;
414 }
415 }
416 }
417
418 free(kvm_msr_list);
419 }
420
421 if (has_msr_star == 1)
422 return 1;
423 return 0;
424}
425
20420430
SY
426static int kvm_init_identity_map_page(KVMState *s)
427{
428#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
429 int ret;
430 uint64_t addr = 0xfffbc000;
431
432 if (!kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
433 return 0;
434 }
435
436 ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &addr);
437 if (ret < 0) {
438 fprintf(stderr, "kvm_set_identity_map_addr: %s\n", strerror(ret));
439 return ret;
440 }
441#endif
442 return 0;
443}
444
05330448
AL
445int kvm_arch_init(KVMState *s, int smp_cpus)
446{
447 int ret;
448
449 /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code
450 * directly. In order to use vm86 mode, a TSS is needed. Since this
451 * must be part of guest physical memory, we need to allocate it. Older
452 * versions of KVM just assumed that it would be at the end of physical
453 * memory but that doesn't work with more than 4GB of memory. We simply
454 * refuse to work with those older versions of KVM. */
984b5181 455 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
05330448
AL
456 if (ret <= 0) {
457 fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
458 return ret;
459 }
460
461 /* this address is 3 pages before the bios, and the bios should present
462 * as unavaible memory. FIXME, need to ensure the e820 map deals with
463 * this?
464 */
4c5b10b7
JS
465 /*
466 * Tell fw_cfg to notify the BIOS to reserve the range.
467 */
468 if (e820_add_entry(0xfffbc000, 0x4000, E820_RESERVED) < 0) {
469 perror("e820_add_entry() table is full");
470 exit(1);
471 }
20420430
SY
472 ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
473 if (ret < 0) {
474 return ret;
475 }
476
477 return kvm_init_identity_map_page(s);
05330448
AL
478}
479
480static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
481{
482 lhs->selector = rhs->selector;
483 lhs->base = rhs->base;
484 lhs->limit = rhs->limit;
485 lhs->type = 3;
486 lhs->present = 1;
487 lhs->dpl = 3;
488 lhs->db = 0;
489 lhs->s = 1;
490 lhs->l = 0;
491 lhs->g = 0;
492 lhs->avl = 0;
493 lhs->unusable = 0;
494}
495
496static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
497{
498 unsigned flags = rhs->flags;
499 lhs->selector = rhs->selector;
500 lhs->base = rhs->base;
501 lhs->limit = rhs->limit;
502 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
503 lhs->present = (flags & DESC_P_MASK) != 0;
504 lhs->dpl = rhs->selector & 3;
505 lhs->db = (flags >> DESC_B_SHIFT) & 1;
506 lhs->s = (flags & DESC_S_MASK) != 0;
507 lhs->l = (flags >> DESC_L_SHIFT) & 1;
508 lhs->g = (flags & DESC_G_MASK) != 0;
509 lhs->avl = (flags & DESC_AVL_MASK) != 0;
510 lhs->unusable = 0;
511}
512
513static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
514{
515 lhs->selector = rhs->selector;
516 lhs->base = rhs->base;
517 lhs->limit = rhs->limit;
518 lhs->flags =
519 (rhs->type << DESC_TYPE_SHIFT)
520 | (rhs->present * DESC_P_MASK)
521 | (rhs->dpl << DESC_DPL_SHIFT)
522 | (rhs->db << DESC_B_SHIFT)
523 | (rhs->s * DESC_S_MASK)
524 | (rhs->l << DESC_L_SHIFT)
525 | (rhs->g * DESC_G_MASK)
526 | (rhs->avl * DESC_AVL_MASK);
527}
528
529static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
530{
531 if (set)
532 *kvm_reg = *qemu_reg;
533 else
534 *qemu_reg = *kvm_reg;
535}
536
537static int kvm_getput_regs(CPUState *env, int set)
538{
539 struct kvm_regs regs;
540 int ret = 0;
541
542 if (!set) {
543 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
544 if (ret < 0)
545 return ret;
546 }
547
548 kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
549 kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
550 kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
551 kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
552 kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
553 kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
554 kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
555 kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
556#ifdef TARGET_X86_64
557 kvm_getput_reg(&regs.r8, &env->regs[8], set);
558 kvm_getput_reg(&regs.r9, &env->regs[9], set);
559 kvm_getput_reg(&regs.r10, &env->regs[10], set);
560 kvm_getput_reg(&regs.r11, &env->regs[11], set);
561 kvm_getput_reg(&regs.r12, &env->regs[12], set);
562 kvm_getput_reg(&regs.r13, &env->regs[13], set);
563 kvm_getput_reg(&regs.r14, &env->regs[14], set);
564 kvm_getput_reg(&regs.r15, &env->regs[15], set);
565#endif
566
567 kvm_getput_reg(&regs.rflags, &env->eflags, set);
568 kvm_getput_reg(&regs.rip, &env->eip, set);
569
570 if (set)
571 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
572
573 return ret;
574}
575
576static int kvm_put_fpu(CPUState *env)
577{
578 struct kvm_fpu fpu;
579 int i;
580
581 memset(&fpu, 0, sizeof fpu);
582 fpu.fsw = env->fpus & ~(7 << 11);
583 fpu.fsw |= (env->fpstt & 7) << 11;
584 fpu.fcw = env->fpuc;
585 for (i = 0; i < 8; ++i)
586 fpu.ftwx |= (!env->fptags[i]) << i;
587 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
588 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
589 fpu.mxcsr = env->mxcsr;
590
591 return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
592}
593
f1665b21
SY
594#ifdef KVM_CAP_XSAVE
595#define XSAVE_CWD_RIP 2
596#define XSAVE_CWD_RDP 4
597#define XSAVE_MXCSR 6
598#define XSAVE_ST_SPACE 8
599#define XSAVE_XMM_SPACE 40
600#define XSAVE_XSTATE_BV 128
601#define XSAVE_YMMH_SPACE 144
602#endif
603
604static int kvm_put_xsave(CPUState *env)
605{
606#ifdef KVM_CAP_XSAVE
607 int i;
608 struct kvm_xsave* xsave;
609 uint16_t cwd, swd, twd, fop;
610
611 if (!kvm_has_xsave())
612 return kvm_put_fpu(env);
613
614 xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
615 memset(xsave, 0, sizeof(struct kvm_xsave));
616 cwd = swd = twd = fop = 0;
617 swd = env->fpus & ~(7 << 11);
618 swd |= (env->fpstt & 7) << 11;
619 cwd = env->fpuc;
620 for (i = 0; i < 8; ++i)
621 twd |= (!env->fptags[i]) << i;
622 xsave->region[0] = (uint32_t)(swd << 16) + cwd;
623 xsave->region[1] = (uint32_t)(fop << 16) + twd;
624 memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
625 sizeof env->fpregs);
626 memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs,
627 sizeof env->xmm_regs);
628 xsave->region[XSAVE_MXCSR] = env->mxcsr;
629 *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
630 memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
631 sizeof env->ymmh_regs);
632 return kvm_vcpu_ioctl(env, KVM_SET_XSAVE, xsave);
633#else
634 return kvm_put_fpu(env);
635#endif
636}
637
638static int kvm_put_xcrs(CPUState *env)
639{
640#ifdef KVM_CAP_XCRS
641 struct kvm_xcrs xcrs;
642
643 if (!kvm_has_xcrs())
644 return 0;
645
646 xcrs.nr_xcrs = 1;
647 xcrs.flags = 0;
648 xcrs.xcrs[0].xcr = 0;
649 xcrs.xcrs[0].value = env->xcr0;
650 return kvm_vcpu_ioctl(env, KVM_SET_XCRS, &xcrs);
651#else
652 return 0;
653#endif
654}
655
05330448
AL
656static int kvm_put_sregs(CPUState *env)
657{
658 struct kvm_sregs sregs;
659
0e607a80
JK
660 memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
661 if (env->interrupt_injected >= 0) {
662 sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
663 (uint64_t)1 << (env->interrupt_injected % 64);
664 }
05330448
AL
665
666 if ((env->eflags & VM_MASK)) {
667 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
668 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
669 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
670 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
671 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
672 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
673 } else {
674 set_seg(&sregs.cs, &env->segs[R_CS]);
675 set_seg(&sregs.ds, &env->segs[R_DS]);
676 set_seg(&sregs.es, &env->segs[R_ES]);
677 set_seg(&sregs.fs, &env->segs[R_FS]);
678 set_seg(&sregs.gs, &env->segs[R_GS]);
679 set_seg(&sregs.ss, &env->segs[R_SS]);
680
681 if (env->cr[0] & CR0_PE_MASK) {
682 /* force ss cpl to cs cpl */
683 sregs.ss.selector = (sregs.ss.selector & ~3) |
684 (sregs.cs.selector & 3);
685 sregs.ss.dpl = sregs.ss.selector & 3;
686 }
687 }
688
689 set_seg(&sregs.tr, &env->tr);
690 set_seg(&sregs.ldt, &env->ldt);
691
692 sregs.idt.limit = env->idt.limit;
693 sregs.idt.base = env->idt.base;
694 sregs.gdt.limit = env->gdt.limit;
695 sregs.gdt.base = env->gdt.base;
696
697 sregs.cr0 = env->cr[0];
698 sregs.cr2 = env->cr[2];
699 sregs.cr3 = env->cr[3];
700 sregs.cr4 = env->cr[4];
701
4a942cea
BS
702 sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
703 sregs.apic_base = cpu_get_apic_base(env->apic_state);
05330448
AL
704
705 sregs.efer = env->efer;
706
707 return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
708}
709
710static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
711 uint32_t index, uint64_t value)
712{
713 entry->index = index;
714 entry->data = value;
715}
716
ea643051 717static int kvm_put_msrs(CPUState *env, int level)
05330448
AL
718{
719 struct {
720 struct kvm_msrs info;
721 struct kvm_msr_entry entries[100];
722 } msr_data;
723 struct kvm_msr_entry *msrs = msr_data.entries;
724 int n = 0;
725
726 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
727 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
728 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
729 if (kvm_has_msr_star(env))
730 kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
05330448
AL
731#ifdef TARGET_X86_64
732 /* FIXME if lm capable */
733 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
734 kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
735 kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
736 kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
737#endif
ea643051
JK
738 if (level == KVM_PUT_FULL_STATE) {
739 kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
740 kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
741 env->system_time_msr);
742 kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
743 }
1a03675d 744
05330448
AL
745 msr_data.info.nmsrs = n;
746
747 return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
748
749}
750
751
752static int kvm_get_fpu(CPUState *env)
753{
754 struct kvm_fpu fpu;
755 int i, ret;
756
757 ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
758 if (ret < 0)
759 return ret;
760
761 env->fpstt = (fpu.fsw >> 11) & 7;
762 env->fpus = fpu.fsw;
763 env->fpuc = fpu.fcw;
764 for (i = 0; i < 8; ++i)
765 env->fptags[i] = !((fpu.ftwx >> i) & 1);
766 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
767 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
768 env->mxcsr = fpu.mxcsr;
769
770 return 0;
771}
772
f1665b21
SY
773static int kvm_get_xsave(CPUState *env)
774{
775#ifdef KVM_CAP_XSAVE
776 struct kvm_xsave* xsave;
777 int ret, i;
778 uint16_t cwd, swd, twd, fop;
779
780 if (!kvm_has_xsave())
781 return kvm_get_fpu(env);
782
783 xsave = qemu_memalign(4096, sizeof(struct kvm_xsave));
784 ret = kvm_vcpu_ioctl(env, KVM_GET_XSAVE, xsave);
785 if (ret < 0)
786 return ret;
787
788 cwd = (uint16_t)xsave->region[0];
789 swd = (uint16_t)(xsave->region[0] >> 16);
790 twd = (uint16_t)xsave->region[1];
791 fop = (uint16_t)(xsave->region[1] >> 16);
792 env->fpstt = (swd >> 11) & 7;
793 env->fpus = swd;
794 env->fpuc = cwd;
795 for (i = 0; i < 8; ++i)
796 env->fptags[i] = !((twd >> i) & 1);
797 env->mxcsr = xsave->region[XSAVE_MXCSR];
798 memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
799 sizeof env->fpregs);
800 memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE],
801 sizeof env->xmm_regs);
802 env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
803 memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
804 sizeof env->ymmh_regs);
805 return 0;
806#else
807 return kvm_get_fpu(env);
808#endif
809}
810
811static int kvm_get_xcrs(CPUState *env)
812{
813#ifdef KVM_CAP_XCRS
814 int i, ret;
815 struct kvm_xcrs xcrs;
816
817 if (!kvm_has_xcrs())
818 return 0;
819
820 ret = kvm_vcpu_ioctl(env, KVM_GET_XCRS, &xcrs);
821 if (ret < 0)
822 return ret;
823
824 for (i = 0; i < xcrs.nr_xcrs; i++)
825 /* Only support xcr0 now */
826 if (xcrs.xcrs[0].xcr == 0) {
827 env->xcr0 = xcrs.xcrs[0].value;
828 break;
829 }
830 return 0;
831#else
832 return 0;
833#endif
834}
835
05330448
AL
836static int kvm_get_sregs(CPUState *env)
837{
838 struct kvm_sregs sregs;
839 uint32_t hflags;
0e607a80 840 int bit, i, ret;
05330448
AL
841
842 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
843 if (ret < 0)
844 return ret;
845
0e607a80
JK
846 /* There can only be one pending IRQ set in the bitmap at a time, so try
847 to find it and save its number instead (-1 for none). */
848 env->interrupt_injected = -1;
849 for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
850 if (sregs.interrupt_bitmap[i]) {
851 bit = ctz64(sregs.interrupt_bitmap[i]);
852 env->interrupt_injected = i * 64 + bit;
853 break;
854 }
855 }
05330448
AL
856
857 get_seg(&env->segs[R_CS], &sregs.cs);
858 get_seg(&env->segs[R_DS], &sregs.ds);
859 get_seg(&env->segs[R_ES], &sregs.es);
860 get_seg(&env->segs[R_FS], &sregs.fs);
861 get_seg(&env->segs[R_GS], &sregs.gs);
862 get_seg(&env->segs[R_SS], &sregs.ss);
863
864 get_seg(&env->tr, &sregs.tr);
865 get_seg(&env->ldt, &sregs.ldt);
866
867 env->idt.limit = sregs.idt.limit;
868 env->idt.base = sregs.idt.base;
869 env->gdt.limit = sregs.gdt.limit;
870 env->gdt.base = sregs.gdt.base;
871
872 env->cr[0] = sregs.cr0;
873 env->cr[2] = sregs.cr2;
874 env->cr[3] = sregs.cr3;
875 env->cr[4] = sregs.cr4;
876
4a942cea 877 cpu_set_apic_base(env->apic_state, sregs.apic_base);
05330448
AL
878
879 env->efer = sregs.efer;
4a942cea 880 //cpu_set_apic_tpr(env->apic_state, sregs.cr8);
05330448
AL
881
882#define HFLAG_COPY_MASK ~( \
883 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
884 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
885 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
886 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
887
888
889
890 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
891 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
892 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
893 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
894 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
895 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
896 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
897
898 if (env->efer & MSR_EFER_LMA) {
899 hflags |= HF_LMA_MASK;
900 }
901
902 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
903 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
904 } else {
905 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
906 (DESC_B_SHIFT - HF_CS32_SHIFT);
907 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
908 (DESC_B_SHIFT - HF_SS32_SHIFT);
909 if (!(env->cr[0] & CR0_PE_MASK) ||
910 (env->eflags & VM_MASK) ||
911 !(hflags & HF_CS32_MASK)) {
912 hflags |= HF_ADDSEG_MASK;
913 } else {
914 hflags |= ((env->segs[R_DS].base |
915 env->segs[R_ES].base |
916 env->segs[R_SS].base) != 0) <<
917 HF_ADDSEG_SHIFT;
918 }
919 }
920 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
05330448
AL
921
922 return 0;
923}
924
925static int kvm_get_msrs(CPUState *env)
926{
927 struct {
928 struct kvm_msrs info;
929 struct kvm_msr_entry entries[100];
930 } msr_data;
931 struct kvm_msr_entry *msrs = msr_data.entries;
932 int ret, i, n;
933
934 n = 0;
935 msrs[n++].index = MSR_IA32_SYSENTER_CS;
936 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
937 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
938 if (kvm_has_msr_star(env))
939 msrs[n++].index = MSR_STAR;
940 msrs[n++].index = MSR_IA32_TSC;
941#ifdef TARGET_X86_64
942 /* FIXME lm_capable_kernel */
943 msrs[n++].index = MSR_CSTAR;
944 msrs[n++].index = MSR_KERNELGSBASE;
945 msrs[n++].index = MSR_FMASK;
946 msrs[n++].index = MSR_LSTAR;
947#endif
1a03675d
GC
948 msrs[n++].index = MSR_KVM_SYSTEM_TIME;
949 msrs[n++].index = MSR_KVM_WALL_CLOCK;
950
05330448
AL
951 msr_data.info.nmsrs = n;
952 ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
953 if (ret < 0)
954 return ret;
955
956 for (i = 0; i < ret; i++) {
957 switch (msrs[i].index) {
958 case MSR_IA32_SYSENTER_CS:
959 env->sysenter_cs = msrs[i].data;
960 break;
961 case MSR_IA32_SYSENTER_ESP:
962 env->sysenter_esp = msrs[i].data;
963 break;
964 case MSR_IA32_SYSENTER_EIP:
965 env->sysenter_eip = msrs[i].data;
966 break;
967 case MSR_STAR:
968 env->star = msrs[i].data;
969 break;
970#ifdef TARGET_X86_64
971 case MSR_CSTAR:
972 env->cstar = msrs[i].data;
973 break;
974 case MSR_KERNELGSBASE:
975 env->kernelgsbase = msrs[i].data;
976 break;
977 case MSR_FMASK:
978 env->fmask = msrs[i].data;
979 break;
980 case MSR_LSTAR:
981 env->lstar = msrs[i].data;
982 break;
983#endif
984 case MSR_IA32_TSC:
985 env->tsc = msrs[i].data;
986 break;
1a03675d
GC
987 case MSR_KVM_SYSTEM_TIME:
988 env->system_time_msr = msrs[i].data;
989 break;
990 case MSR_KVM_WALL_CLOCK:
991 env->wall_clock_msr = msrs[i].data;
992 break;
05330448
AL
993 }
994 }
995
996 return 0;
997}
998
9bdbe550
HB
999static int kvm_put_mp_state(CPUState *env)
1000{
1001 struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
1002
1003 return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
1004}
1005
1006static int kvm_get_mp_state(CPUState *env)
1007{
1008 struct kvm_mp_state mp_state;
1009 int ret;
1010
1011 ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
1012 if (ret < 0) {
1013 return ret;
1014 }
1015 env->mp_state = mp_state.mp_state;
1016 return 0;
1017}
1018
ea643051 1019static int kvm_put_vcpu_events(CPUState *env, int level)
a0fb002c
JK
1020{
1021#ifdef KVM_CAP_VCPU_EVENTS
1022 struct kvm_vcpu_events events;
1023
1024 if (!kvm_has_vcpu_events()) {
1025 return 0;
1026 }
1027
31827373
JK
1028 events.exception.injected = (env->exception_injected >= 0);
1029 events.exception.nr = env->exception_injected;
a0fb002c
JK
1030 events.exception.has_error_code = env->has_error_code;
1031 events.exception.error_code = env->error_code;
1032
1033 events.interrupt.injected = (env->interrupt_injected >= 0);
1034 events.interrupt.nr = env->interrupt_injected;
1035 events.interrupt.soft = env->soft_interrupt;
1036
1037 events.nmi.injected = env->nmi_injected;
1038 events.nmi.pending = env->nmi_pending;
1039 events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
1040
1041 events.sipi_vector = env->sipi_vector;
1042
ea643051
JK
1043 events.flags = 0;
1044 if (level >= KVM_PUT_RESET_STATE) {
1045 events.flags |=
1046 KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
1047 }
aee028b9 1048
a0fb002c
JK
1049 return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events);
1050#else
1051 return 0;
1052#endif
1053}
1054
1055static int kvm_get_vcpu_events(CPUState *env)
1056{
1057#ifdef KVM_CAP_VCPU_EVENTS
1058 struct kvm_vcpu_events events;
1059 int ret;
1060
1061 if (!kvm_has_vcpu_events()) {
1062 return 0;
1063 }
1064
1065 ret = kvm_vcpu_ioctl(env, KVM_GET_VCPU_EVENTS, &events);
1066 if (ret < 0) {
1067 return ret;
1068 }
31827373 1069 env->exception_injected =
a0fb002c
JK
1070 events.exception.injected ? events.exception.nr : -1;
1071 env->has_error_code = events.exception.has_error_code;
1072 env->error_code = events.exception.error_code;
1073
1074 env->interrupt_injected =
1075 events.interrupt.injected ? events.interrupt.nr : -1;
1076 env->soft_interrupt = events.interrupt.soft;
1077
1078 env->nmi_injected = events.nmi.injected;
1079 env->nmi_pending = events.nmi.pending;
1080 if (events.nmi.masked) {
1081 env->hflags2 |= HF2_NMI_MASK;
1082 } else {
1083 env->hflags2 &= ~HF2_NMI_MASK;
1084 }
1085
1086 env->sipi_vector = events.sipi_vector;
1087#endif
1088
1089 return 0;
1090}
1091
b0b1d690
JK
1092static int kvm_guest_debug_workarounds(CPUState *env)
1093{
1094 int ret = 0;
1095#ifdef KVM_CAP_SET_GUEST_DEBUG
1096 unsigned long reinject_trap = 0;
1097
1098 if (!kvm_has_vcpu_events()) {
1099 if (env->exception_injected == 1) {
1100 reinject_trap = KVM_GUESTDBG_INJECT_DB;
1101 } else if (env->exception_injected == 3) {
1102 reinject_trap = KVM_GUESTDBG_INJECT_BP;
1103 }
1104 env->exception_injected = -1;
1105 }
1106
1107 /*
1108 * Kernels before KVM_CAP_X86_ROBUST_SINGLESTEP overwrote flags.TF
1109 * injected via SET_GUEST_DEBUG while updating GP regs. Work around this
1110 * by updating the debug state once again if single-stepping is on.
1111 * Another reason to call kvm_update_guest_debug here is a pending debug
1112 * trap raise by the guest. On kernels without SET_VCPU_EVENTS we have to
1113 * reinject them via SET_GUEST_DEBUG.
1114 */
1115 if (reinject_trap ||
1116 (!kvm_has_robust_singlestep() && env->singlestep_enabled)) {
1117 ret = kvm_update_guest_debug(env, reinject_trap);
1118 }
1119#endif /* KVM_CAP_SET_GUEST_DEBUG */
1120 return ret;
1121}
1122
ff44f1a3
JK
1123static int kvm_put_debugregs(CPUState *env)
1124{
1125#ifdef KVM_CAP_DEBUGREGS
1126 struct kvm_debugregs dbgregs;
1127 int i;
1128
1129 if (!kvm_has_debugregs()) {
1130 return 0;
1131 }
1132
1133 for (i = 0; i < 4; i++) {
1134 dbgregs.db[i] = env->dr[i];
1135 }
1136 dbgregs.dr6 = env->dr[6];
1137 dbgregs.dr7 = env->dr[7];
1138 dbgregs.flags = 0;
1139
1140 return kvm_vcpu_ioctl(env, KVM_SET_DEBUGREGS, &dbgregs);
1141#else
1142 return 0;
1143#endif
1144}
1145
1146static int kvm_get_debugregs(CPUState *env)
1147{
1148#ifdef KVM_CAP_DEBUGREGS
1149 struct kvm_debugregs dbgregs;
1150 int i, ret;
1151
1152 if (!kvm_has_debugregs()) {
1153 return 0;
1154 }
1155
1156 ret = kvm_vcpu_ioctl(env, KVM_GET_DEBUGREGS, &dbgregs);
1157 if (ret < 0) {
1158 return ret;
1159 }
1160 for (i = 0; i < 4; i++) {
1161 env->dr[i] = dbgregs.db[i];
1162 }
1163 env->dr[4] = env->dr[6] = dbgregs.dr6;
1164 env->dr[5] = env->dr[7] = dbgregs.dr7;
1165#endif
1166
1167 return 0;
1168}
1169
ea375f9a 1170int kvm_arch_put_registers(CPUState *env, int level)
05330448
AL
1171{
1172 int ret;
1173
dbaa07c4
JK
1174 assert(cpu_is_stopped(env) || qemu_cpu_self(env));
1175
05330448
AL
1176 ret = kvm_getput_regs(env, 1);
1177 if (ret < 0)
1178 return ret;
1179
f1665b21
SY
1180 ret = kvm_put_xsave(env);
1181 if (ret < 0)
1182 return ret;
1183
1184 ret = kvm_put_xcrs(env);
05330448
AL
1185 if (ret < 0)
1186 return ret;
1187
1188 ret = kvm_put_sregs(env);
1189 if (ret < 0)
1190 return ret;
1191
ea643051 1192 ret = kvm_put_msrs(env, level);
05330448
AL
1193 if (ret < 0)
1194 return ret;
1195
ea643051
JK
1196 if (level >= KVM_PUT_RESET_STATE) {
1197 ret = kvm_put_mp_state(env);
1198 if (ret < 0)
1199 return ret;
1200 }
f8d926e9 1201
ea643051 1202 ret = kvm_put_vcpu_events(env, level);
a0fb002c
JK
1203 if (ret < 0)
1204 return ret;
1205
b0b1d690
JK
1206 /* must be last */
1207 ret = kvm_guest_debug_workarounds(env);
1208 if (ret < 0)
1209 return ret;
1210
ff44f1a3
JK
1211 ret = kvm_put_debugregs(env);
1212 if (ret < 0)
1213 return ret;
1214
05330448
AL
1215 return 0;
1216}
1217
1218int kvm_arch_get_registers(CPUState *env)
1219{
1220 int ret;
1221
dbaa07c4
JK
1222 assert(cpu_is_stopped(env) || qemu_cpu_self(env));
1223
05330448
AL
1224 ret = kvm_getput_regs(env, 0);
1225 if (ret < 0)
1226 return ret;
1227
f1665b21
SY
1228 ret = kvm_get_xsave(env);
1229 if (ret < 0)
1230 return ret;
1231
1232 ret = kvm_get_xcrs(env);
05330448
AL
1233 if (ret < 0)
1234 return ret;
1235
1236 ret = kvm_get_sregs(env);
1237 if (ret < 0)
1238 return ret;
1239
1240 ret = kvm_get_msrs(env);
1241 if (ret < 0)
1242 return ret;
1243
5a2e3c2e
JK
1244 ret = kvm_get_mp_state(env);
1245 if (ret < 0)
1246 return ret;
1247
a0fb002c
JK
1248 ret = kvm_get_vcpu_events(env);
1249 if (ret < 0)
1250 return ret;
1251
ff44f1a3
JK
1252 ret = kvm_get_debugregs(env);
1253 if (ret < 0)
1254 return ret;
1255
05330448
AL
1256 return 0;
1257}
1258
1259int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
1260{
1261 /* Try to inject an interrupt if the guest can accept it */
1262 if (run->ready_for_interrupt_injection &&
1263 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
1264 (env->eflags & IF_MASK)) {
1265 int irq;
1266
1267 env->interrupt_request &= ~CPU_INTERRUPT_HARD;
1268 irq = cpu_get_pic_interrupt(env);
1269 if (irq >= 0) {
1270 struct kvm_interrupt intr;
1271 intr.irq = irq;
1272 /* FIXME: errors */
8c0d577e 1273 DPRINTF("injected interrupt %d\n", irq);
05330448
AL
1274 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
1275 }
1276 }
1277
1278 /* If we have an interrupt but the guest is not ready to receive an
1279 * interrupt, request an interrupt window exit. This will
1280 * cause a return to userspace as soon as the guest is ready to
1281 * receive interrupts. */
1282 if ((env->interrupt_request & CPU_INTERRUPT_HARD))
1283 run->request_interrupt_window = 1;
1284 else
1285 run->request_interrupt_window = 0;
1286
8c0d577e 1287 DPRINTF("setting tpr\n");
4a942cea 1288 run->cr8 = cpu_get_apic_tpr(env->apic_state);
05330448
AL
1289
1290 return 0;
1291}
1292
1293int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
1294{
1295 if (run->if_flag)
1296 env->eflags |= IF_MASK;
1297 else
1298 env->eflags &= ~IF_MASK;
1299
4a942cea
BS
1300 cpu_set_apic_tpr(env->apic_state, run->cr8);
1301 cpu_set_apic_base(env->apic_state, run->apic_base);
05330448
AL
1302
1303 return 0;
1304}
1305
0af691d7
MT
1306int kvm_arch_process_irqchip_events(CPUState *env)
1307{
1308 if (env->interrupt_request & CPU_INTERRUPT_INIT) {
1309 kvm_cpu_synchronize_state(env);
1310 do_cpu_init(env);
1311 env->exception_index = EXCP_HALTED;
1312 }
1313
1314 if (env->interrupt_request & CPU_INTERRUPT_SIPI) {
1315 kvm_cpu_synchronize_state(env);
1316 do_cpu_sipi(env);
1317 }
1318
1319 return env->halted;
1320}
1321
05330448
AL
1322static int kvm_handle_halt(CPUState *env)
1323{
1324 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
1325 (env->eflags & IF_MASK)) &&
1326 !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
1327 env->halted = 1;
1328 env->exception_index = EXCP_HLT;
1329 return 0;
1330 }
1331
1332 return 1;
1333}
1334
1335int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
1336{
1337 int ret = 0;
1338
1339 switch (run->exit_reason) {
1340 case KVM_EXIT_HLT:
8c0d577e 1341 DPRINTF("handle_hlt\n");
05330448
AL
1342 ret = kvm_handle_halt(env);
1343 break;
1344 }
1345
1346 return ret;
1347}
e22a25c9
AL
1348
1349#ifdef KVM_CAP_SET_GUEST_DEBUG
e22a25c9
AL
1350int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
1351{
38972938 1352 static const uint8_t int3 = 0xcc;
64bf3f4e 1353
e22a25c9 1354 if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
64bf3f4e 1355 cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
e22a25c9
AL
1356 return -EINVAL;
1357 return 0;
1358}
1359
1360int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
1361{
1362 uint8_t int3;
1363
1364 if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
64bf3f4e 1365 cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
e22a25c9
AL
1366 return -EINVAL;
1367 return 0;
1368}
1369
1370static struct {
1371 target_ulong addr;
1372 int len;
1373 int type;
1374} hw_breakpoint[4];
1375
1376static int nb_hw_breakpoint;
1377
1378static int find_hw_breakpoint(target_ulong addr, int len, int type)
1379{
1380 int n;
1381
1382 for (n = 0; n < nb_hw_breakpoint; n++)
1383 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
1384 (hw_breakpoint[n].len == len || len == -1))
1385 return n;
1386 return -1;
1387}
1388
1389int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1390 target_ulong len, int type)
1391{
1392 switch (type) {
1393 case GDB_BREAKPOINT_HW:
1394 len = 1;
1395 break;
1396 case GDB_WATCHPOINT_WRITE:
1397 case GDB_WATCHPOINT_ACCESS:
1398 switch (len) {
1399 case 1:
1400 break;
1401 case 2:
1402 case 4:
1403 case 8:
1404 if (addr & (len - 1))
1405 return -EINVAL;
1406 break;
1407 default:
1408 return -EINVAL;
1409 }
1410 break;
1411 default:
1412 return -ENOSYS;
1413 }
1414
1415 if (nb_hw_breakpoint == 4)
1416 return -ENOBUFS;
1417
1418 if (find_hw_breakpoint(addr, len, type) >= 0)
1419 return -EEXIST;
1420
1421 hw_breakpoint[nb_hw_breakpoint].addr = addr;
1422 hw_breakpoint[nb_hw_breakpoint].len = len;
1423 hw_breakpoint[nb_hw_breakpoint].type = type;
1424 nb_hw_breakpoint++;
1425
1426 return 0;
1427}
1428
1429int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1430 target_ulong len, int type)
1431{
1432 int n;
1433
1434 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
1435 if (n < 0)
1436 return -ENOENT;
1437
1438 nb_hw_breakpoint--;
1439 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
1440
1441 return 0;
1442}
1443
1444void kvm_arch_remove_all_hw_breakpoints(void)
1445{
1446 nb_hw_breakpoint = 0;
1447}
1448
1449static CPUWatchpoint hw_watchpoint;
1450
1451int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
1452{
1453 int handle = 0;
1454 int n;
1455
1456 if (arch_info->exception == 1) {
1457 if (arch_info->dr6 & (1 << 14)) {
1458 if (cpu_single_env->singlestep_enabled)
1459 handle = 1;
1460 } else {
1461 for (n = 0; n < 4; n++)
1462 if (arch_info->dr6 & (1 << n))
1463 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
1464 case 0x0:
1465 handle = 1;
1466 break;
1467 case 0x1:
1468 handle = 1;
1469 cpu_single_env->watchpoint_hit = &hw_watchpoint;
1470 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1471 hw_watchpoint.flags = BP_MEM_WRITE;
1472 break;
1473 case 0x3:
1474 handle = 1;
1475 cpu_single_env->watchpoint_hit = &hw_watchpoint;
1476 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1477 hw_watchpoint.flags = BP_MEM_ACCESS;
1478 break;
1479 }
1480 }
1481 } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
1482 handle = 1;
1483
b0b1d690
JK
1484 if (!handle) {
1485 cpu_synchronize_state(cpu_single_env);
1486 assert(cpu_single_env->exception_injected == -1);
1487
1488 cpu_single_env->exception_injected = arch_info->exception;
1489 cpu_single_env->has_error_code = 0;
1490 }
e22a25c9
AL
1491
1492 return handle;
1493}
1494
1495void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
1496{
1497 const uint8_t type_code[] = {
1498 [GDB_BREAKPOINT_HW] = 0x0,
1499 [GDB_WATCHPOINT_WRITE] = 0x1,
1500 [GDB_WATCHPOINT_ACCESS] = 0x3
1501 };
1502 const uint8_t len_code[] = {
1503 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
1504 };
1505 int n;
1506
1507 if (kvm_sw_breakpoints_active(env))
1508 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1509
1510 if (nb_hw_breakpoint > 0) {
1511 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1512 dbg->arch.debugreg[7] = 0x0600;
1513 for (n = 0; n < nb_hw_breakpoint; n++) {
1514 dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
1515 dbg->arch.debugreg[7] |= (2 << (n * 2)) |
1516 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
1517 (len_code[hw_breakpoint[n].len] << (18 + n*4));
1518 }
1519 }
f1665b21
SY
1520 /* Legal xcr0 for loading */
1521 env->xcr0 = 1;
e22a25c9
AL
1522}
1523#endif /* KVM_CAP_SET_GUEST_DEBUG */
4513d923
GN
1524
1525bool kvm_arch_stop_on_emulation_error(CPUState *env)
1526{
1527 return !(env->cr[0] & CR0_PE_MASK) ||
1528 ((env->segs[R_CS].selector & 3) != 3);
1529}
1530