]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
Merge remote-tracking branch 'bonzini/migration-writev' into staging
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/ppc/spapr.h"
35 #include "mmu-hash64.h"
36
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "sysemu/watchdog.h"
41
42 //#define DEBUG_KVM
43
44 #ifdef DEBUG_KVM
45 #define dprintf(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #else
48 #define dprintf(fmt, ...) \
49 do { } while (0)
50 #endif
51
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53
54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
56 };
57
58 static int cap_interrupt_unset = false;
59 static int cap_interrupt_level = false;
60 static int cap_segstate;
61 static int cap_booke_sregs;
62 static int cap_ppc_smt;
63 static int cap_ppc_rma;
64 static int cap_spapr_tce;
65 static int cap_hior;
66 static int cap_one_reg;
67 static int cap_epr;
68 static int cap_ppc_watchdog;
69 static int cap_papr;
70
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
75 *
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
79 */
80 static QEMUTimer *idle_timer;
81
82 static void kvm_kick_cpu(void *opaque)
83 {
84 PowerPCCPU *cpu = opaque;
85
86 qemu_cpu_kick(CPU(cpu));
87 }
88
89 static int kvm_ppc_register_host_cpu_type(void);
90
91 int kvm_arch_init(KVMState *s)
92 {
93 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106
107 if (!cap_interrupt_level) {
108 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
109 "VM to stall at times!\n");
110 }
111
112 kvm_ppc_register_host_cpu_type();
113
114 return 0;
115 }
116
117 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
118 {
119 CPUPPCState *cenv = &cpu->env;
120 CPUState *cs = CPU(cpu);
121 struct kvm_sregs sregs;
122 int ret;
123
124 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
125 /* What we're really trying to say is "if we're on BookE, we use
126 the native PVR for now". This is the only sane way to check
127 it though, so we potentially confuse users that they can run
128 BookE guests on BookS. Let's hope nobody dares enough :) */
129 return 0;
130 } else {
131 if (!cap_segstate) {
132 fprintf(stderr, "kvm error: missing PVR setting capability\n");
133 return -ENOSYS;
134 }
135 }
136
137 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
138 if (ret) {
139 return ret;
140 }
141
142 sregs.pvr = cenv->spr[SPR_PVR];
143 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
144 }
145
146 /* Set up a shared TLB array with KVM */
147 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
148 {
149 CPUPPCState *env = &cpu->env;
150 CPUState *cs = CPU(cpu);
151 struct kvm_book3e_206_tlb_params params = {};
152 struct kvm_config_tlb cfg = {};
153 struct kvm_enable_cap encap = {};
154 unsigned int entries = 0;
155 int ret, i;
156
157 if (!kvm_enabled() ||
158 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
159 return 0;
160 }
161
162 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
163
164 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
165 params.tlb_sizes[i] = booke206_tlb_size(env, i);
166 params.tlb_ways[i] = booke206_tlb_ways(env, i);
167 entries += params.tlb_sizes[i];
168 }
169
170 assert(entries == env->nb_tlb);
171 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
172
173 env->tlb_dirty = true;
174
175 cfg.array = (uintptr_t)env->tlb.tlbm;
176 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
177 cfg.params = (uintptr_t)&params;
178 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
179
180 encap.cap = KVM_CAP_SW_TLB;
181 encap.args[0] = (uintptr_t)&cfg;
182
183 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
184 if (ret < 0) {
185 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
186 __func__, strerror(-ret));
187 return ret;
188 }
189
190 env->kvm_sw_tlb = true;
191 return 0;
192 }
193
194
195 #if defined(TARGET_PPC64)
196 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
197 struct kvm_ppc_smmu_info *info)
198 {
199 CPUPPCState *env = &cpu->env;
200 CPUState *cs = CPU(cpu);
201
202 memset(info, 0, sizeof(*info));
203
204 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
205 * need to "guess" what the supported page sizes are.
206 *
207 * For that to work we make a few assumptions:
208 *
209 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
210 * KVM which only supports 4K and 16M pages, but supports them
211 * regardless of the backing store characteritics. We also don't
212 * support 1T segments.
213 *
214 * This is safe as if HV KVM ever supports that capability or PR
215 * KVM grows supports for more page/segment sizes, those versions
216 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
217 * will not hit this fallback
218 *
219 * - Else we are running HV KVM. This means we only support page
220 * sizes that fit in the backing store. Additionally we only
221 * advertize 64K pages if the processor is ARCH 2.06 and we assume
222 * P7 encodings for the SLB and hash table. Here too, we assume
223 * support for any newer processor will mean a kernel that
224 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
225 * this fallback.
226 */
227 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
228 /* No flags */
229 info->flags = 0;
230 info->slb_size = 64;
231
232 /* Standard 4k base page size segment */
233 info->sps[0].page_shift = 12;
234 info->sps[0].slb_enc = 0;
235 info->sps[0].enc[0].page_shift = 12;
236 info->sps[0].enc[0].pte_enc = 0;
237
238 /* Standard 16M large page size segment */
239 info->sps[1].page_shift = 24;
240 info->sps[1].slb_enc = SLB_VSID_L;
241 info->sps[1].enc[0].page_shift = 24;
242 info->sps[1].enc[0].pte_enc = 0;
243 } else {
244 int i = 0;
245
246 /* HV KVM has backing store size restrictions */
247 info->flags = KVM_PPC_PAGE_SIZES_REAL;
248
249 if (env->mmu_model & POWERPC_MMU_1TSEG) {
250 info->flags |= KVM_PPC_1T_SEGMENTS;
251 }
252
253 if (env->mmu_model == POWERPC_MMU_2_06) {
254 info->slb_size = 32;
255 } else {
256 info->slb_size = 64;
257 }
258
259 /* Standard 4k base page size segment */
260 info->sps[i].page_shift = 12;
261 info->sps[i].slb_enc = 0;
262 info->sps[i].enc[0].page_shift = 12;
263 info->sps[i].enc[0].pte_enc = 0;
264 i++;
265
266 /* 64K on MMU 2.06 */
267 if (env->mmu_model == POWERPC_MMU_2_06) {
268 info->sps[i].page_shift = 16;
269 info->sps[i].slb_enc = 0x110;
270 info->sps[i].enc[0].page_shift = 16;
271 info->sps[i].enc[0].pte_enc = 1;
272 i++;
273 }
274
275 /* Standard 16M large page size segment */
276 info->sps[i].page_shift = 24;
277 info->sps[i].slb_enc = SLB_VSID_L;
278 info->sps[i].enc[0].page_shift = 24;
279 info->sps[i].enc[0].pte_enc = 0;
280 }
281 }
282
283 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
284 {
285 CPUState *cs = CPU(cpu);
286 int ret;
287
288 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
289 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
290 if (ret == 0) {
291 return;
292 }
293 }
294
295 kvm_get_fallback_smmu_info(cpu, info);
296 }
297
298 static long getrampagesize(void)
299 {
300 struct statfs fs;
301 int ret;
302
303 if (!mem_path) {
304 /* guest RAM is backed by normal anonymous pages */
305 return getpagesize();
306 }
307
308 do {
309 ret = statfs(mem_path, &fs);
310 } while (ret != 0 && errno == EINTR);
311
312 if (ret != 0) {
313 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
314 strerror(errno));
315 exit(1);
316 }
317
318 #define HUGETLBFS_MAGIC 0x958458f6
319
320 if (fs.f_type != HUGETLBFS_MAGIC) {
321 /* Explicit mempath, but it's ordinary pages */
322 return getpagesize();
323 }
324
325 /* It's hugepage, return the huge page size */
326 return fs.f_bsize;
327 }
328
329 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
330 {
331 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
332 return true;
333 }
334
335 return (1ul << shift) <= rampgsize;
336 }
337
338 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
339 {
340 static struct kvm_ppc_smmu_info smmu_info;
341 static bool has_smmu_info;
342 CPUPPCState *env = &cpu->env;
343 long rampagesize;
344 int iq, ik, jq, jk;
345
346 /* We only handle page sizes for 64-bit server guests for now */
347 if (!(env->mmu_model & POWERPC_MMU_64)) {
348 return;
349 }
350
351 /* Collect MMU info from kernel if not already */
352 if (!has_smmu_info) {
353 kvm_get_smmu_info(cpu, &smmu_info);
354 has_smmu_info = true;
355 }
356
357 rampagesize = getrampagesize();
358
359 /* Convert to QEMU form */
360 memset(&env->sps, 0, sizeof(env->sps));
361
362 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
363 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
364 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
365
366 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
367 ksps->page_shift)) {
368 continue;
369 }
370 qsps->page_shift = ksps->page_shift;
371 qsps->slb_enc = ksps->slb_enc;
372 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
373 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
374 ksps->enc[jk].page_shift)) {
375 continue;
376 }
377 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
378 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
379 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
380 break;
381 }
382 }
383 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
384 break;
385 }
386 }
387 env->slb_nr = smmu_info.slb_size;
388 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
389 env->mmu_model |= POWERPC_MMU_1TSEG;
390 } else {
391 env->mmu_model &= ~POWERPC_MMU_1TSEG;
392 }
393 }
394 #else /* defined (TARGET_PPC64) */
395
396 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
397 {
398 }
399
400 #endif /* !defined (TARGET_PPC64) */
401
402 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
403 {
404 return cpu->cpu_index;
405 }
406
407 int kvm_arch_init_vcpu(CPUState *cs)
408 {
409 PowerPCCPU *cpu = POWERPC_CPU(cs);
410 CPUPPCState *cenv = &cpu->env;
411 int ret;
412
413 /* Gather server mmu info from KVM and update the CPU state */
414 kvm_fixup_page_sizes(cpu);
415
416 /* Synchronize sregs with kvm */
417 ret = kvm_arch_sync_sregs(cpu);
418 if (ret) {
419 return ret;
420 }
421
422 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
423
424 /* Some targets support access to KVM's guest TLB. */
425 switch (cenv->mmu_model) {
426 case POWERPC_MMU_BOOKE206:
427 ret = kvm_booke206_tlb_init(cpu);
428 break;
429 default:
430 break;
431 }
432
433 return ret;
434 }
435
436 void kvm_arch_reset_vcpu(CPUState *cpu)
437 {
438 }
439
440 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
441 {
442 CPUPPCState *env = &cpu->env;
443 CPUState *cs = CPU(cpu);
444 struct kvm_dirty_tlb dirty_tlb;
445 unsigned char *bitmap;
446 int ret;
447
448 if (!env->kvm_sw_tlb) {
449 return;
450 }
451
452 bitmap = g_malloc((env->nb_tlb + 7) / 8);
453 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
454
455 dirty_tlb.bitmap = (uintptr_t)bitmap;
456 dirty_tlb.num_dirty = env->nb_tlb;
457
458 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
459 if (ret) {
460 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
461 __func__, strerror(-ret));
462 }
463
464 g_free(bitmap);
465 }
466
467 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
468 {
469 PowerPCCPU *cpu = POWERPC_CPU(cs);
470 CPUPPCState *env = &cpu->env;
471 union {
472 uint32_t u32;
473 uint64_t u64;
474 } val;
475 struct kvm_one_reg reg = {
476 .id = id,
477 .addr = (uintptr_t) &val,
478 };
479 int ret;
480
481 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
482 if (ret != 0) {
483 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
484 spr, strerror(errno));
485 } else {
486 switch (id & KVM_REG_SIZE_MASK) {
487 case KVM_REG_SIZE_U32:
488 env->spr[spr] = val.u32;
489 break;
490
491 case KVM_REG_SIZE_U64:
492 env->spr[spr] = val.u64;
493 break;
494
495 default:
496 /* Don't handle this size yet */
497 abort();
498 }
499 }
500 }
501
502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
503 {
504 PowerPCCPU *cpu = POWERPC_CPU(cs);
505 CPUPPCState *env = &cpu->env;
506 union {
507 uint32_t u32;
508 uint64_t u64;
509 } val;
510 struct kvm_one_reg reg = {
511 .id = id,
512 .addr = (uintptr_t) &val,
513 };
514 int ret;
515
516 switch (id & KVM_REG_SIZE_MASK) {
517 case KVM_REG_SIZE_U32:
518 val.u32 = env->spr[spr];
519 break;
520
521 case KVM_REG_SIZE_U64:
522 val.u64 = env->spr[spr];
523 break;
524
525 default:
526 /* Don't handle this size yet */
527 abort();
528 }
529
530 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
531 if (ret != 0) {
532 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
533 spr, strerror(errno));
534 }
535 }
536
537 static int kvm_put_fp(CPUState *cs)
538 {
539 PowerPCCPU *cpu = POWERPC_CPU(cs);
540 CPUPPCState *env = &cpu->env;
541 struct kvm_one_reg reg;
542 int i;
543 int ret;
544
545 if (env->insns_flags & PPC_FLOAT) {
546 uint64_t fpscr = env->fpscr;
547 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
548
549 reg.id = KVM_REG_PPC_FPSCR;
550 reg.addr = (uintptr_t)&fpscr;
551 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
552 if (ret < 0) {
553 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
554 return ret;
555 }
556
557 for (i = 0; i < 32; i++) {
558 uint64_t vsr[2];
559
560 vsr[0] = float64_val(env->fpr[i]);
561 vsr[1] = env->vsr[i];
562 reg.addr = (uintptr_t) &vsr;
563 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
564
565 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
566 if (ret < 0) {
567 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
568 i, strerror(errno));
569 return ret;
570 }
571 }
572 }
573
574 if (env->insns_flags & PPC_ALTIVEC) {
575 reg.id = KVM_REG_PPC_VSCR;
576 reg.addr = (uintptr_t)&env->vscr;
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
579 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
580 return ret;
581 }
582
583 for (i = 0; i < 32; i++) {
584 reg.id = KVM_REG_PPC_VR(i);
585 reg.addr = (uintptr_t)&env->avr[i];
586 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
587 if (ret < 0) {
588 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
589 return ret;
590 }
591 }
592 }
593
594 return 0;
595 }
596
597 static int kvm_get_fp(CPUState *cs)
598 {
599 PowerPCCPU *cpu = POWERPC_CPU(cs);
600 CPUPPCState *env = &cpu->env;
601 struct kvm_one_reg reg;
602 int i;
603 int ret;
604
605 if (env->insns_flags & PPC_FLOAT) {
606 uint64_t fpscr;
607 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
608
609 reg.id = KVM_REG_PPC_FPSCR;
610 reg.addr = (uintptr_t)&fpscr;
611 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
612 if (ret < 0) {
613 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
614 return ret;
615 } else {
616 env->fpscr = fpscr;
617 }
618
619 for (i = 0; i < 32; i++) {
620 uint64_t vsr[2];
621
622 reg.addr = (uintptr_t) &vsr;
623 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
624
625 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
626 if (ret < 0) {
627 dprintf("Unable to get %s%d from KVM: %s\n",
628 vsx ? "VSR" : "FPR", i, strerror(errno));
629 return ret;
630 } else {
631 env->fpr[i] = vsr[0];
632 if (vsx) {
633 env->vsr[i] = vsr[1];
634 }
635 }
636 }
637 }
638
639 if (env->insns_flags & PPC_ALTIVEC) {
640 reg.id = KVM_REG_PPC_VSCR;
641 reg.addr = (uintptr_t)&env->vscr;
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
644 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
645 return ret;
646 }
647
648 for (i = 0; i < 32; i++) {
649 reg.id = KVM_REG_PPC_VR(i);
650 reg.addr = (uintptr_t)&env->avr[i];
651 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
652 if (ret < 0) {
653 dprintf("Unable to get VR%d from KVM: %s\n",
654 i, strerror(errno));
655 return ret;
656 }
657 }
658 }
659
660 return 0;
661 }
662
663 #if defined(TARGET_PPC64)
664 static int kvm_get_vpa(CPUState *cs)
665 {
666 PowerPCCPU *cpu = POWERPC_CPU(cs);
667 CPUPPCState *env = &cpu->env;
668 struct kvm_one_reg reg;
669 int ret;
670
671 reg.id = KVM_REG_PPC_VPA_ADDR;
672 reg.addr = (uintptr_t)&env->vpa_addr;
673 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
674 if (ret < 0) {
675 dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno));
676 return ret;
677 }
678
679 assert((uintptr_t)&env->slb_shadow_size
680 == ((uintptr_t)&env->slb_shadow_addr + 8));
681 reg.id = KVM_REG_PPC_VPA_SLB;
682 reg.addr = (uintptr_t)&env->slb_shadow_addr;
683 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
684 if (ret < 0) {
685 dprintf("Unable to get SLB shadow state from KVM: %s\n",
686 strerror(errno));
687 return ret;
688 }
689
690 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
691 reg.id = KVM_REG_PPC_VPA_DTL;
692 reg.addr = (uintptr_t)&env->dtl_addr;
693 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
694 if (ret < 0) {
695 dprintf("Unable to get dispatch trace log state from KVM: %s\n",
696 strerror(errno));
697 return ret;
698 }
699
700 return 0;
701 }
702
703 static int kvm_put_vpa(CPUState *cs)
704 {
705 PowerPCCPU *cpu = POWERPC_CPU(cs);
706 CPUPPCState *env = &cpu->env;
707 struct kvm_one_reg reg;
708 int ret;
709
710 /* SLB shadow or DTL can't be registered unless a master VPA is
711 * registered. That means when restoring state, if a VPA *is*
712 * registered, we need to set that up first. If not, we need to
713 * deregister the others before deregistering the master VPA */
714 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
715
716 if (env->vpa_addr) {
717 reg.id = KVM_REG_PPC_VPA_ADDR;
718 reg.addr = (uintptr_t)&env->vpa_addr;
719 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
720 if (ret < 0) {
721 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
722 return ret;
723 }
724 }
725
726 assert((uintptr_t)&env->slb_shadow_size
727 == ((uintptr_t)&env->slb_shadow_addr + 8));
728 reg.id = KVM_REG_PPC_VPA_SLB;
729 reg.addr = (uintptr_t)&env->slb_shadow_addr;
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
731 if (ret < 0) {
732 dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
733 return ret;
734 }
735
736 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
737 reg.id = KVM_REG_PPC_VPA_DTL;
738 reg.addr = (uintptr_t)&env->dtl_addr;
739 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
740 if (ret < 0) {
741 dprintf("Unable to set dispatch trace log state to KVM: %s\n",
742 strerror(errno));
743 return ret;
744 }
745
746 if (!env->vpa_addr) {
747 reg.id = KVM_REG_PPC_VPA_ADDR;
748 reg.addr = (uintptr_t)&env->vpa_addr;
749 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
750 if (ret < 0) {
751 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
752 return ret;
753 }
754 }
755
756 return 0;
757 }
758 #endif /* TARGET_PPC64 */
759
760 int kvm_arch_put_registers(CPUState *cs, int level)
761 {
762 PowerPCCPU *cpu = POWERPC_CPU(cs);
763 CPUPPCState *env = &cpu->env;
764 struct kvm_regs regs;
765 int ret;
766 int i;
767
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
769 if (ret < 0) {
770 return ret;
771 }
772
773 regs.ctr = env->ctr;
774 regs.lr = env->lr;
775 regs.xer = cpu_read_xer(env);
776 regs.msr = env->msr;
777 regs.pc = env->nip;
778
779 regs.srr0 = env->spr[SPR_SRR0];
780 regs.srr1 = env->spr[SPR_SRR1];
781
782 regs.sprg0 = env->spr[SPR_SPRG0];
783 regs.sprg1 = env->spr[SPR_SPRG1];
784 regs.sprg2 = env->spr[SPR_SPRG2];
785 regs.sprg3 = env->spr[SPR_SPRG3];
786 regs.sprg4 = env->spr[SPR_SPRG4];
787 regs.sprg5 = env->spr[SPR_SPRG5];
788 regs.sprg6 = env->spr[SPR_SPRG6];
789 regs.sprg7 = env->spr[SPR_SPRG7];
790
791 regs.pid = env->spr[SPR_BOOKE_PID];
792
793 for (i = 0;i < 32; i++)
794 regs.gpr[i] = env->gpr[i];
795
796 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
797 if (ret < 0)
798 return ret;
799
800 kvm_put_fp(cs);
801
802 if (env->tlb_dirty) {
803 kvm_sw_tlb_put(cpu);
804 env->tlb_dirty = false;
805 }
806
807 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
808 struct kvm_sregs sregs;
809
810 sregs.pvr = env->spr[SPR_PVR];
811
812 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
813
814 /* Sync SLB */
815 #ifdef TARGET_PPC64
816 for (i = 0; i < 64; i++) {
817 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
818 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
819 }
820 #endif
821
822 /* Sync SRs */
823 for (i = 0; i < 16; i++) {
824 sregs.u.s.ppc32.sr[i] = env->sr[i];
825 }
826
827 /* Sync BATs */
828 for (i = 0; i < 8; i++) {
829 /* Beware. We have to swap upper and lower bits here */
830 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
831 | env->DBAT[1][i];
832 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
833 | env->IBAT[1][i];
834 }
835
836 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
837 if (ret) {
838 return ret;
839 }
840 }
841
842 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
843 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
844 }
845
846 if (cap_one_reg) {
847 int i;
848
849 /* We deliberately ignore errors here, for kernels which have
850 * the ONE_REG calls, but don't support the specific
851 * registers, there's a reasonable chance things will still
852 * work, at least until we try to migrate. */
853 for (i = 0; i < 1024; i++) {
854 uint64_t id = env->spr_cb[i].one_reg_id;
855
856 if (id != 0) {
857 kvm_put_one_spr(cs, id, i);
858 }
859 }
860
861 #ifdef TARGET_PPC64
862 if (cap_papr) {
863 if (kvm_put_vpa(cs) < 0) {
864 dprintf("Warning: Unable to set VPA information to KVM\n");
865 }
866 }
867 #endif /* TARGET_PPC64 */
868 }
869
870 return ret;
871 }
872
873 int kvm_arch_get_registers(CPUState *cs)
874 {
875 PowerPCCPU *cpu = POWERPC_CPU(cs);
876 CPUPPCState *env = &cpu->env;
877 struct kvm_regs regs;
878 struct kvm_sregs sregs;
879 uint32_t cr;
880 int i, ret;
881
882 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
883 if (ret < 0)
884 return ret;
885
886 cr = regs.cr;
887 for (i = 7; i >= 0; i--) {
888 env->crf[i] = cr & 15;
889 cr >>= 4;
890 }
891
892 env->ctr = regs.ctr;
893 env->lr = regs.lr;
894 cpu_write_xer(env, regs.xer);
895 env->msr = regs.msr;
896 env->nip = regs.pc;
897
898 env->spr[SPR_SRR0] = regs.srr0;
899 env->spr[SPR_SRR1] = regs.srr1;
900
901 env->spr[SPR_SPRG0] = regs.sprg0;
902 env->spr[SPR_SPRG1] = regs.sprg1;
903 env->spr[SPR_SPRG2] = regs.sprg2;
904 env->spr[SPR_SPRG3] = regs.sprg3;
905 env->spr[SPR_SPRG4] = regs.sprg4;
906 env->spr[SPR_SPRG5] = regs.sprg5;
907 env->spr[SPR_SPRG6] = regs.sprg6;
908 env->spr[SPR_SPRG7] = regs.sprg7;
909
910 env->spr[SPR_BOOKE_PID] = regs.pid;
911
912 for (i = 0;i < 32; i++)
913 env->gpr[i] = regs.gpr[i];
914
915 kvm_get_fp(cs);
916
917 if (cap_booke_sregs) {
918 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
919 if (ret < 0) {
920 return ret;
921 }
922
923 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
924 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
925 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
926 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
927 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
928 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
929 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
930 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
931 env->spr[SPR_DECR] = sregs.u.e.dec;
932 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
933 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
934 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
935 }
936
937 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
938 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
939 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
940 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
941 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
942 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
943 }
944
945 if (sregs.u.e.features & KVM_SREGS_E_64) {
946 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
947 }
948
949 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
950 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
951 }
952
953 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
954 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
955 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
956 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
957 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
958 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
959 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
960 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
961 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
962 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
963 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
964 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
965 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
966 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
967 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
968 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
969 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
970
971 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
972 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
973 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
974 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
975 }
976
977 if (sregs.u.e.features & KVM_SREGS_E_PM) {
978 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
979 }
980
981 if (sregs.u.e.features & KVM_SREGS_E_PC) {
982 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
983 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
984 }
985 }
986
987 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
988 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
989 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
990 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
991 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
992 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
993 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
994 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
995 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
996 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
997 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
998 }
999
1000 if (sregs.u.e.features & KVM_SREGS_EXP) {
1001 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1002 }
1003
1004 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1005 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1006 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1007 }
1008
1009 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1010 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1011 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1012 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1013
1014 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1015 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1016 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1017 }
1018 }
1019 }
1020
1021 if (cap_segstate) {
1022 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1023 if (ret < 0) {
1024 return ret;
1025 }
1026
1027 ppc_store_sdr1(env, sregs.u.s.sdr1);
1028
1029 /* Sync SLB */
1030 #ifdef TARGET_PPC64
1031 for (i = 0; i < 64; i++) {
1032 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
1033 sregs.u.s.ppc64.slb[i].slbv);
1034 }
1035 #endif
1036
1037 /* Sync SRs */
1038 for (i = 0; i < 16; i++) {
1039 env->sr[i] = sregs.u.s.ppc32.sr[i];
1040 }
1041
1042 /* Sync BATs */
1043 for (i = 0; i < 8; i++) {
1044 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1045 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1046 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1047 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1048 }
1049 }
1050
1051 if (cap_hior) {
1052 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1053 }
1054
1055 if (cap_one_reg) {
1056 int i;
1057
1058 /* We deliberately ignore errors here, for kernels which have
1059 * the ONE_REG calls, but don't support the specific
1060 * registers, there's a reasonable chance things will still
1061 * work, at least until we try to migrate. */
1062 for (i = 0; i < 1024; i++) {
1063 uint64_t id = env->spr_cb[i].one_reg_id;
1064
1065 if (id != 0) {
1066 kvm_get_one_spr(cs, id, i);
1067 }
1068 }
1069
1070 #ifdef TARGET_PPC64
1071 if (cap_papr) {
1072 if (kvm_get_vpa(cs) < 0) {
1073 dprintf("Warning: Unable to get VPA information from KVM\n");
1074 }
1075 }
1076 #endif
1077 }
1078
1079 return 0;
1080 }
1081
1082 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1083 {
1084 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1085
1086 if (irq != PPC_INTERRUPT_EXT) {
1087 return 0;
1088 }
1089
1090 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1091 return 0;
1092 }
1093
1094 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1095
1096 return 0;
1097 }
1098
1099 #if defined(TARGET_PPCEMB)
1100 #define PPC_INPUT_INT PPC40x_INPUT_INT
1101 #elif defined(TARGET_PPC64)
1102 #define PPC_INPUT_INT PPC970_INPUT_INT
1103 #else
1104 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1105 #endif
1106
1107 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1108 {
1109 PowerPCCPU *cpu = POWERPC_CPU(cs);
1110 CPUPPCState *env = &cpu->env;
1111 int r;
1112 unsigned irq;
1113
1114 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1115 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1116 if (!cap_interrupt_level &&
1117 run->ready_for_interrupt_injection &&
1118 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1119 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1120 {
1121 /* For now KVM disregards the 'irq' argument. However, in the
1122 * future KVM could cache it in-kernel to avoid a heavyweight exit
1123 * when reading the UIC.
1124 */
1125 irq = KVM_INTERRUPT_SET;
1126
1127 dprintf("injected interrupt %d\n", irq);
1128 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1129 if (r < 0) {
1130 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1131 }
1132
1133 /* Always wake up soon in case the interrupt was level based */
1134 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1135 (get_ticks_per_sec() / 50));
1136 }
1137
1138 /* We don't know if there are more interrupts pending after this. However,
1139 * the guest will return to userspace in the course of handling this one
1140 * anyways, so we will get a chance to deliver the rest. */
1141 }
1142
1143 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1144 {
1145 }
1146
1147 int kvm_arch_process_async_events(CPUState *cs)
1148 {
1149 return cs->halted;
1150 }
1151
1152 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1153 {
1154 CPUState *cs = CPU(cpu);
1155 CPUPPCState *env = &cpu->env;
1156
1157 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1158 cs->halted = 1;
1159 env->exception_index = EXCP_HLT;
1160 }
1161
1162 return 0;
1163 }
1164
1165 /* map dcr access to existing qemu dcr emulation */
1166 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1167 {
1168 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1169 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1170
1171 return 0;
1172 }
1173
1174 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1175 {
1176 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1177 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1178
1179 return 0;
1180 }
1181
1182 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1183 {
1184 PowerPCCPU *cpu = POWERPC_CPU(cs);
1185 CPUPPCState *env = &cpu->env;
1186 int ret;
1187
1188 switch (run->exit_reason) {
1189 case KVM_EXIT_DCR:
1190 if (run->dcr.is_write) {
1191 dprintf("handle dcr write\n");
1192 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1193 } else {
1194 dprintf("handle dcr read\n");
1195 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1196 }
1197 break;
1198 case KVM_EXIT_HLT:
1199 dprintf("handle halt\n");
1200 ret = kvmppc_handle_halt(cpu);
1201 break;
1202 #if defined(TARGET_PPC64)
1203 case KVM_EXIT_PAPR_HCALL:
1204 dprintf("handle PAPR hypercall\n");
1205 run->papr_hcall.ret = spapr_hypercall(cpu,
1206 run->papr_hcall.nr,
1207 run->papr_hcall.args);
1208 ret = 0;
1209 break;
1210 #endif
1211 case KVM_EXIT_EPR:
1212 dprintf("handle epr\n");
1213 run->epr.epr = ldl_phys(env->mpic_iack);
1214 ret = 0;
1215 break;
1216 case KVM_EXIT_WATCHDOG:
1217 dprintf("handle watchdog expiry\n");
1218 watchdog_perform_action();
1219 ret = 0;
1220 break;
1221
1222 default:
1223 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1224 ret = -1;
1225 break;
1226 }
1227
1228 return ret;
1229 }
1230
1231 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1232 {
1233 CPUState *cs = CPU(cpu);
1234 uint32_t bits = tsr_bits;
1235 struct kvm_one_reg reg = {
1236 .id = KVM_REG_PPC_OR_TSR,
1237 .addr = (uintptr_t) &bits,
1238 };
1239
1240 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1241 }
1242
1243 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1244 {
1245
1246 CPUState *cs = CPU(cpu);
1247 uint32_t bits = tsr_bits;
1248 struct kvm_one_reg reg = {
1249 .id = KVM_REG_PPC_CLEAR_TSR,
1250 .addr = (uintptr_t) &bits,
1251 };
1252
1253 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1254 }
1255
1256 int kvmppc_set_tcr(PowerPCCPU *cpu)
1257 {
1258 CPUState *cs = CPU(cpu);
1259 CPUPPCState *env = &cpu->env;
1260 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1261
1262 struct kvm_one_reg reg = {
1263 .id = KVM_REG_PPC_TCR,
1264 .addr = (uintptr_t) &tcr,
1265 };
1266
1267 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1268 }
1269
1270 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1271 {
1272 CPUState *cs = CPU(cpu);
1273 struct kvm_enable_cap encap = {};
1274 int ret;
1275
1276 if (!kvm_enabled()) {
1277 return -1;
1278 }
1279
1280 if (!cap_ppc_watchdog) {
1281 printf("warning: KVM does not support watchdog");
1282 return -1;
1283 }
1284
1285 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1286 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1287 if (ret < 0) {
1288 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1289 __func__, strerror(-ret));
1290 return ret;
1291 }
1292
1293 return ret;
1294 }
1295
1296 static int read_cpuinfo(const char *field, char *value, int len)
1297 {
1298 FILE *f;
1299 int ret = -1;
1300 int field_len = strlen(field);
1301 char line[512];
1302
1303 f = fopen("/proc/cpuinfo", "r");
1304 if (!f) {
1305 return -1;
1306 }
1307
1308 do {
1309 if(!fgets(line, sizeof(line), f)) {
1310 break;
1311 }
1312 if (!strncmp(line, field, field_len)) {
1313 pstrcpy(value, len, line);
1314 ret = 0;
1315 break;
1316 }
1317 } while(*line);
1318
1319 fclose(f);
1320
1321 return ret;
1322 }
1323
1324 uint32_t kvmppc_get_tbfreq(void)
1325 {
1326 char line[512];
1327 char *ns;
1328 uint32_t retval = get_ticks_per_sec();
1329
1330 if (read_cpuinfo("timebase", line, sizeof(line))) {
1331 return retval;
1332 }
1333
1334 if (!(ns = strchr(line, ':'))) {
1335 return retval;
1336 }
1337
1338 ns++;
1339
1340 retval = atoi(ns);
1341 return retval;
1342 }
1343
1344 /* Try to find a device tree node for a CPU with clock-frequency property */
1345 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1346 {
1347 struct dirent *dirp;
1348 DIR *dp;
1349
1350 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1351 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1352 return -1;
1353 }
1354
1355 buf[0] = '\0';
1356 while ((dirp = readdir(dp)) != NULL) {
1357 FILE *f;
1358 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1359 dirp->d_name);
1360 f = fopen(buf, "r");
1361 if (f) {
1362 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1363 fclose(f);
1364 break;
1365 }
1366 buf[0] = '\0';
1367 }
1368 closedir(dp);
1369 if (buf[0] == '\0') {
1370 printf("Unknown host!\n");
1371 return -1;
1372 }
1373
1374 return 0;
1375 }
1376
1377 /* Read a CPU node property from the host device tree that's a single
1378 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1379 * (can't find or open the property, or doesn't understand the
1380 * format) */
1381 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1382 {
1383 char buf[PATH_MAX];
1384 union {
1385 uint32_t v32;
1386 uint64_t v64;
1387 } u;
1388 FILE *f;
1389 int len;
1390
1391 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1392 return -1;
1393 }
1394
1395 strncat(buf, "/", sizeof(buf) - strlen(buf));
1396 strncat(buf, propname, sizeof(buf) - strlen(buf));
1397
1398 f = fopen(buf, "rb");
1399 if (!f) {
1400 return -1;
1401 }
1402
1403 len = fread(&u, 1, sizeof(u), f);
1404 fclose(f);
1405 switch (len) {
1406 case 4:
1407 /* property is a 32-bit quantity */
1408 return be32_to_cpu(u.v32);
1409 case 8:
1410 return be64_to_cpu(u.v64);
1411 }
1412
1413 return 0;
1414 }
1415
1416 uint64_t kvmppc_get_clockfreq(void)
1417 {
1418 return kvmppc_read_int_cpu_dt("clock-frequency");
1419 }
1420
1421 uint32_t kvmppc_get_vmx(void)
1422 {
1423 return kvmppc_read_int_cpu_dt("ibm,vmx");
1424 }
1425
1426 uint32_t kvmppc_get_dfp(void)
1427 {
1428 return kvmppc_read_int_cpu_dt("ibm,dfp");
1429 }
1430
1431 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1432 {
1433 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1434 CPUState *cs = CPU(cpu);
1435
1436 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1437 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1438 return 0;
1439 }
1440
1441 return 1;
1442 }
1443
1444 int kvmppc_get_hasidle(CPUPPCState *env)
1445 {
1446 struct kvm_ppc_pvinfo pvinfo;
1447
1448 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1449 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1450 return 1;
1451 }
1452
1453 return 0;
1454 }
1455
1456 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1457 {
1458 uint32_t *hc = (uint32_t*)buf;
1459 struct kvm_ppc_pvinfo pvinfo;
1460
1461 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1462 memcpy(buf, pvinfo.hcall, buf_len);
1463 return 0;
1464 }
1465
1466 /*
1467 * Fallback to always fail hypercalls:
1468 *
1469 * li r3, -1
1470 * nop
1471 * nop
1472 * nop
1473 */
1474
1475 hc[0] = 0x3860ffff;
1476 hc[1] = 0x60000000;
1477 hc[2] = 0x60000000;
1478 hc[3] = 0x60000000;
1479
1480 return 0;
1481 }
1482
1483 void kvmppc_set_papr(PowerPCCPU *cpu)
1484 {
1485 CPUPPCState *env = &cpu->env;
1486 CPUState *cs = CPU(cpu);
1487 struct kvm_enable_cap cap = {};
1488 int ret;
1489
1490 cap.cap = KVM_CAP_PPC_PAPR;
1491 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1492
1493 if (ret) {
1494 cpu_abort(env, "This KVM version does not support PAPR\n");
1495 }
1496
1497 /* Update the capability flag so we sync the right information
1498 * with kvm */
1499 cap_papr = 1;
1500 }
1501
1502 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1503 {
1504 CPUPPCState *env = &cpu->env;
1505 CPUState *cs = CPU(cpu);
1506 struct kvm_enable_cap cap = {};
1507 int ret;
1508
1509 cap.cap = KVM_CAP_PPC_EPR;
1510 cap.args[0] = mpic_proxy;
1511 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1512
1513 if (ret && mpic_proxy) {
1514 cpu_abort(env, "This KVM version does not support EPR\n");
1515 }
1516 }
1517
1518 int kvmppc_smt_threads(void)
1519 {
1520 return cap_ppc_smt ? cap_ppc_smt : 1;
1521 }
1522
1523 #ifdef TARGET_PPC64
1524 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1525 {
1526 void *rma;
1527 off_t size;
1528 int fd;
1529 struct kvm_allocate_rma ret;
1530 MemoryRegion *rma_region;
1531
1532 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1533 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1534 * not necessary on this hardware
1535 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1536 *
1537 * FIXME: We should allow the user to force contiguous RMA
1538 * allocation in the cap_ppc_rma==1 case.
1539 */
1540 if (cap_ppc_rma < 2) {
1541 return 0;
1542 }
1543
1544 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1545 if (fd < 0) {
1546 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1547 strerror(errno));
1548 return -1;
1549 }
1550
1551 size = MIN(ret.rma_size, 256ul << 20);
1552
1553 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1554 if (rma == MAP_FAILED) {
1555 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1556 return -1;
1557 };
1558
1559 rma_region = g_new(MemoryRegion, 1);
1560 memory_region_init_ram_ptr(rma_region, name, size, rma);
1561 vmstate_register_ram_global(rma_region);
1562 memory_region_add_subregion(sysmem, 0, rma_region);
1563
1564 return size;
1565 }
1566
1567 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1568 {
1569 struct kvm_ppc_smmu_info info;
1570 long rampagesize, best_page_shift;
1571 int i;
1572
1573 if (cap_ppc_rma >= 2) {
1574 return current_size;
1575 }
1576
1577 /* Find the largest hardware supported page size that's less than
1578 * or equal to the (logical) backing page size of guest RAM */
1579 kvm_get_smmu_info(ppc_env_get_cpu(first_cpu), &info);
1580 rampagesize = getrampagesize();
1581 best_page_shift = 0;
1582
1583 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1584 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1585
1586 if (!sps->page_shift) {
1587 continue;
1588 }
1589
1590 if ((sps->page_shift > best_page_shift)
1591 && ((1UL << sps->page_shift) <= rampagesize)) {
1592 best_page_shift = sps->page_shift;
1593 }
1594 }
1595
1596 return MIN(current_size,
1597 1ULL << (best_page_shift + hash_shift - 7));
1598 }
1599 #endif
1600
1601 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1602 {
1603 struct kvm_create_spapr_tce args = {
1604 .liobn = liobn,
1605 .window_size = window_size,
1606 };
1607 long len;
1608 int fd;
1609 void *table;
1610
1611 /* Must set fd to -1 so we don't try to munmap when called for
1612 * destroying the table, which the upper layers -will- do
1613 */
1614 *pfd = -1;
1615 if (!cap_spapr_tce) {
1616 return NULL;
1617 }
1618
1619 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1620 if (fd < 0) {
1621 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1622 liobn);
1623 return NULL;
1624 }
1625
1626 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1627 /* FIXME: round this up to page size */
1628
1629 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1630 if (table == MAP_FAILED) {
1631 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1632 liobn);
1633 close(fd);
1634 return NULL;
1635 }
1636
1637 *pfd = fd;
1638 return table;
1639 }
1640
1641 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1642 {
1643 long len;
1644
1645 if (fd < 0) {
1646 return -1;
1647 }
1648
1649 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1650 if ((munmap(table, len) < 0) ||
1651 (close(fd) < 0)) {
1652 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1653 strerror(errno));
1654 /* Leak the table */
1655 }
1656
1657 return 0;
1658 }
1659
1660 int kvmppc_reset_htab(int shift_hint)
1661 {
1662 uint32_t shift = shift_hint;
1663
1664 if (!kvm_enabled()) {
1665 /* Full emulation, tell caller to allocate htab itself */
1666 return 0;
1667 }
1668 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1669 int ret;
1670 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1671 if (ret == -ENOTTY) {
1672 /* At least some versions of PR KVM advertise the
1673 * capability, but don't implement the ioctl(). Oops.
1674 * Return 0 so that we allocate the htab in qemu, as is
1675 * correct for PR. */
1676 return 0;
1677 } else if (ret < 0) {
1678 return ret;
1679 }
1680 return shift;
1681 }
1682
1683 /* We have a kernel that predates the htab reset calls. For PR
1684 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1685 * this era, it has allocated a 16MB fixed size hash table
1686 * already. Kernels of this era have the GET_PVINFO capability
1687 * only on PR, so we use this hack to determine the right
1688 * answer */
1689 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1690 /* PR - tell caller to allocate htab */
1691 return 0;
1692 } else {
1693 /* HV - assume 16MB kernel allocated htab */
1694 return 24;
1695 }
1696 }
1697
1698 static inline uint32_t mfpvr(void)
1699 {
1700 uint32_t pvr;
1701
1702 asm ("mfpvr %0"
1703 : "=r"(pvr));
1704 return pvr;
1705 }
1706
1707 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1708 {
1709 if (on) {
1710 *word |= flags;
1711 } else {
1712 *word &= ~flags;
1713 }
1714 }
1715
1716 static void kvmppc_host_cpu_initfn(Object *obj)
1717 {
1718 assert(kvm_enabled());
1719 }
1720
1721 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1722 {
1723 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1724 uint32_t vmx = kvmppc_get_vmx();
1725 uint32_t dfp = kvmppc_get_dfp();
1726 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1727 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1728
1729 /* Now fix up the class with information we can query from the host */
1730
1731 if (vmx != -1) {
1732 /* Only override when we know what the host supports */
1733 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1734 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1735 }
1736 if (dfp != -1) {
1737 /* Only override when we know what the host supports */
1738 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1739 }
1740
1741 if (dcache_size != -1) {
1742 pcc->l1_dcache_size = dcache_size;
1743 }
1744
1745 if (icache_size != -1) {
1746 pcc->l1_icache_size = icache_size;
1747 }
1748 }
1749
1750 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1751 {
1752 CPUState *cs = CPU(cpu);
1753 int smt;
1754
1755 /* Adjust cpu index for SMT */
1756 smt = kvmppc_smt_threads();
1757 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1758 + (cs->cpu_index % smp_threads);
1759
1760 return 0;
1761 }
1762
1763 bool kvmppc_has_cap_epr(void)
1764 {
1765 return cap_epr;
1766 }
1767
1768 static int kvm_ppc_register_host_cpu_type(void)
1769 {
1770 TypeInfo type_info = {
1771 .name = TYPE_HOST_POWERPC_CPU,
1772 .instance_init = kvmppc_host_cpu_initfn,
1773 .class_init = kvmppc_host_cpu_class_init,
1774 };
1775 uint32_t host_pvr = mfpvr();
1776 PowerPCCPUClass *pvr_pcc;
1777
1778 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1779 if (pvr_pcc == NULL) {
1780 return -1;
1781 }
1782 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1783 type_register(&type_info);
1784 return 0;
1785 }
1786
1787
1788 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1789 {
1790 return true;
1791 }
1792
1793 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1794 {
1795 return 1;
1796 }
1797
1798 int kvm_arch_on_sigbus(int code, void *addr)
1799 {
1800 return 1;
1801 }