]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
linux-user/syscall.c: Don't warn about unimplemented get_robust_list
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
65
66 /* XXX We have a race condition where we actually have a level triggered
67 * interrupt, but the infrastructure can't expose that yet, so the guest
68 * takes but ignores it, goes to sleep and never gets notified that there's
69 * still an interrupt pending.
70 *
71 * As a quick workaround, let's just wake up again 20 ms after we injected
72 * an interrupt. That way we can assure that we're always reinjecting
73 * interrupts in case the guest swallowed them.
74 */
75 static QEMUTimer *idle_timer;
76
77 static void kvm_kick_cpu(void *opaque)
78 {
79 PowerPCCPU *cpu = opaque;
80
81 qemu_cpu_kick(CPU(cpu));
82 }
83
84 static int kvm_ppc_register_host_cpu_type(void);
85
86 int kvm_arch_init(KVMState *s)
87 {
88 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
89 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
91 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
92 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
93 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
94 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
95 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
96 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
97
98 if (!cap_interrupt_level) {
99 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
100 "VM to stall at times!\n");
101 }
102
103 kvm_ppc_register_host_cpu_type();
104
105 return 0;
106 }
107
108 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
109 {
110 CPUPPCState *cenv = &cpu->env;
111 CPUState *cs = CPU(cpu);
112 struct kvm_sregs sregs;
113 int ret;
114
115 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
116 /* What we're really trying to say is "if we're on BookE, we use
117 the native PVR for now". This is the only sane way to check
118 it though, so we potentially confuse users that they can run
119 BookE guests on BookS. Let's hope nobody dares enough :) */
120 return 0;
121 } else {
122 if (!cap_segstate) {
123 fprintf(stderr, "kvm error: missing PVR setting capability\n");
124 return -ENOSYS;
125 }
126 }
127
128 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
129 if (ret) {
130 return ret;
131 }
132
133 sregs.pvr = cenv->spr[SPR_PVR];
134 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
135 }
136
137 /* Set up a shared TLB array with KVM */
138 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
139 {
140 CPUPPCState *env = &cpu->env;
141 CPUState *cs = CPU(cpu);
142 struct kvm_book3e_206_tlb_params params = {};
143 struct kvm_config_tlb cfg = {};
144 struct kvm_enable_cap encap = {};
145 unsigned int entries = 0;
146 int ret, i;
147
148 if (!kvm_enabled() ||
149 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
150 return 0;
151 }
152
153 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
154
155 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
156 params.tlb_sizes[i] = booke206_tlb_size(env, i);
157 params.tlb_ways[i] = booke206_tlb_ways(env, i);
158 entries += params.tlb_sizes[i];
159 }
160
161 assert(entries == env->nb_tlb);
162 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
163
164 env->tlb_dirty = true;
165
166 cfg.array = (uintptr_t)env->tlb.tlbm;
167 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
168 cfg.params = (uintptr_t)&params;
169 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
170
171 encap.cap = KVM_CAP_SW_TLB;
172 encap.args[0] = (uintptr_t)&cfg;
173
174 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
175 if (ret < 0) {
176 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
177 __func__, strerror(-ret));
178 return ret;
179 }
180
181 env->kvm_sw_tlb = true;
182 return 0;
183 }
184
185
186 #if defined(TARGET_PPC64)
187 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
188 struct kvm_ppc_smmu_info *info)
189 {
190 CPUPPCState *env = &cpu->env;
191 CPUState *cs = CPU(cpu);
192
193 memset(info, 0, sizeof(*info));
194
195 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
196 * need to "guess" what the supported page sizes are.
197 *
198 * For that to work we make a few assumptions:
199 *
200 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
201 * KVM which only supports 4K and 16M pages, but supports them
202 * regardless of the backing store characteritics. We also don't
203 * support 1T segments.
204 *
205 * This is safe as if HV KVM ever supports that capability or PR
206 * KVM grows supports for more page/segment sizes, those versions
207 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
208 * will not hit this fallback
209 *
210 * - Else we are running HV KVM. This means we only support page
211 * sizes that fit in the backing store. Additionally we only
212 * advertize 64K pages if the processor is ARCH 2.06 and we assume
213 * P7 encodings for the SLB and hash table. Here too, we assume
214 * support for any newer processor will mean a kernel that
215 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
216 * this fallback.
217 */
218 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
219 /* No flags */
220 info->flags = 0;
221 info->slb_size = 64;
222
223 /* Standard 4k base page size segment */
224 info->sps[0].page_shift = 12;
225 info->sps[0].slb_enc = 0;
226 info->sps[0].enc[0].page_shift = 12;
227 info->sps[0].enc[0].pte_enc = 0;
228
229 /* Standard 16M large page size segment */
230 info->sps[1].page_shift = 24;
231 info->sps[1].slb_enc = SLB_VSID_L;
232 info->sps[1].enc[0].page_shift = 24;
233 info->sps[1].enc[0].pte_enc = 0;
234 } else {
235 int i = 0;
236
237 /* HV KVM has backing store size restrictions */
238 info->flags = KVM_PPC_PAGE_SIZES_REAL;
239
240 if (env->mmu_model & POWERPC_MMU_1TSEG) {
241 info->flags |= KVM_PPC_1T_SEGMENTS;
242 }
243
244 if (env->mmu_model == POWERPC_MMU_2_06) {
245 info->slb_size = 32;
246 } else {
247 info->slb_size = 64;
248 }
249
250 /* Standard 4k base page size segment */
251 info->sps[i].page_shift = 12;
252 info->sps[i].slb_enc = 0;
253 info->sps[i].enc[0].page_shift = 12;
254 info->sps[i].enc[0].pte_enc = 0;
255 i++;
256
257 /* 64K on MMU 2.06 */
258 if (env->mmu_model == POWERPC_MMU_2_06) {
259 info->sps[i].page_shift = 16;
260 info->sps[i].slb_enc = 0x110;
261 info->sps[i].enc[0].page_shift = 16;
262 info->sps[i].enc[0].pte_enc = 1;
263 i++;
264 }
265
266 /* Standard 16M large page size segment */
267 info->sps[i].page_shift = 24;
268 info->sps[i].slb_enc = SLB_VSID_L;
269 info->sps[i].enc[0].page_shift = 24;
270 info->sps[i].enc[0].pte_enc = 0;
271 }
272 }
273
274 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
275 {
276 CPUState *cs = CPU(cpu);
277 int ret;
278
279 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
280 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
281 if (ret == 0) {
282 return;
283 }
284 }
285
286 kvm_get_fallback_smmu_info(cpu, info);
287 }
288
289 static long getrampagesize(void)
290 {
291 struct statfs fs;
292 int ret;
293
294 if (!mem_path) {
295 /* guest RAM is backed by normal anonymous pages */
296 return getpagesize();
297 }
298
299 do {
300 ret = statfs(mem_path, &fs);
301 } while (ret != 0 && errno == EINTR);
302
303 if (ret != 0) {
304 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
305 strerror(errno));
306 exit(1);
307 }
308
309 #define HUGETLBFS_MAGIC 0x958458f6
310
311 if (fs.f_type != HUGETLBFS_MAGIC) {
312 /* Explicit mempath, but it's ordinary pages */
313 return getpagesize();
314 }
315
316 /* It's hugepage, return the huge page size */
317 return fs.f_bsize;
318 }
319
320 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
321 {
322 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
323 return true;
324 }
325
326 return (1ul << shift) <= rampgsize;
327 }
328
329 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
330 {
331 static struct kvm_ppc_smmu_info smmu_info;
332 static bool has_smmu_info;
333 CPUPPCState *env = &cpu->env;
334 long rampagesize;
335 int iq, ik, jq, jk;
336
337 /* We only handle page sizes for 64-bit server guests for now */
338 if (!(env->mmu_model & POWERPC_MMU_64)) {
339 return;
340 }
341
342 /* Collect MMU info from kernel if not already */
343 if (!has_smmu_info) {
344 kvm_get_smmu_info(cpu, &smmu_info);
345 has_smmu_info = true;
346 }
347
348 rampagesize = getrampagesize();
349
350 /* Convert to QEMU form */
351 memset(&env->sps, 0, sizeof(env->sps));
352
353 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
354 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
355 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
356
357 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
358 ksps->page_shift)) {
359 continue;
360 }
361 qsps->page_shift = ksps->page_shift;
362 qsps->slb_enc = ksps->slb_enc;
363 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
364 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365 ksps->enc[jk].page_shift)) {
366 continue;
367 }
368 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
369 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
370 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
371 break;
372 }
373 }
374 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
375 break;
376 }
377 }
378 env->slb_nr = smmu_info.slb_size;
379 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
380 env->mmu_model |= POWERPC_MMU_1TSEG;
381 } else {
382 env->mmu_model &= ~POWERPC_MMU_1TSEG;
383 }
384 }
385 #else /* defined (TARGET_PPC64) */
386
387 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
388 {
389 }
390
391 #endif /* !defined (TARGET_PPC64) */
392
393 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
394 {
395 return cpu->cpu_index;
396 }
397
398 int kvm_arch_init_vcpu(CPUState *cs)
399 {
400 PowerPCCPU *cpu = POWERPC_CPU(cs);
401 CPUPPCState *cenv = &cpu->env;
402 int ret;
403
404 /* Gather server mmu info from KVM and update the CPU state */
405 kvm_fixup_page_sizes(cpu);
406
407 /* Synchronize sregs with kvm */
408 ret = kvm_arch_sync_sregs(cpu);
409 if (ret) {
410 return ret;
411 }
412
413 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
414
415 /* Some targets support access to KVM's guest TLB. */
416 switch (cenv->mmu_model) {
417 case POWERPC_MMU_BOOKE206:
418 ret = kvm_booke206_tlb_init(cpu);
419 break;
420 default:
421 break;
422 }
423
424 return ret;
425 }
426
427 void kvm_arch_reset_vcpu(CPUState *cpu)
428 {
429 }
430
431 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
432 {
433 CPUPPCState *env = &cpu->env;
434 CPUState *cs = CPU(cpu);
435 struct kvm_dirty_tlb dirty_tlb;
436 unsigned char *bitmap;
437 int ret;
438
439 if (!env->kvm_sw_tlb) {
440 return;
441 }
442
443 bitmap = g_malloc((env->nb_tlb + 7) / 8);
444 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
445
446 dirty_tlb.bitmap = (uintptr_t)bitmap;
447 dirty_tlb.num_dirty = env->nb_tlb;
448
449 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
450 if (ret) {
451 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
452 __func__, strerror(-ret));
453 }
454
455 g_free(bitmap);
456 }
457
458 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
459 {
460 PowerPCCPU *cpu = POWERPC_CPU(cs);
461 CPUPPCState *env = &cpu->env;
462 union {
463 uint32_t u32;
464 uint64_t u64;
465 } val;
466 struct kvm_one_reg reg = {
467 .id = id,
468 .addr = (uintptr_t) &val,
469 };
470 int ret;
471
472 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
473 if (ret != 0) {
474 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
475 spr, strerror(errno));
476 } else {
477 switch (id & KVM_REG_SIZE_MASK) {
478 case KVM_REG_SIZE_U32:
479 env->spr[spr] = val.u32;
480 break;
481
482 case KVM_REG_SIZE_U64:
483 env->spr[spr] = val.u64;
484 break;
485
486 default:
487 /* Don't handle this size yet */
488 abort();
489 }
490 }
491 }
492
493 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
494 {
495 PowerPCCPU *cpu = POWERPC_CPU(cs);
496 CPUPPCState *env = &cpu->env;
497 union {
498 uint32_t u32;
499 uint64_t u64;
500 } val;
501 struct kvm_one_reg reg = {
502 .id = id,
503 .addr = (uintptr_t) &val,
504 };
505 int ret;
506
507 switch (id & KVM_REG_SIZE_MASK) {
508 case KVM_REG_SIZE_U32:
509 val.u32 = env->spr[spr];
510 break;
511
512 case KVM_REG_SIZE_U64:
513 val.u64 = env->spr[spr];
514 break;
515
516 default:
517 /* Don't handle this size yet */
518 abort();
519 }
520
521 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
522 if (ret != 0) {
523 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
524 spr, strerror(errno));
525 }
526 }
527
528 static int kvm_put_fp(CPUState *cs)
529 {
530 PowerPCCPU *cpu = POWERPC_CPU(cs);
531 CPUPPCState *env = &cpu->env;
532 struct kvm_one_reg reg;
533 int i;
534 int ret;
535
536 if (env->insns_flags & PPC_FLOAT) {
537 uint64_t fpscr = env->fpscr;
538 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
539
540 reg.id = KVM_REG_PPC_FPSCR;
541 reg.addr = (uintptr_t)&fpscr;
542 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
543 if (ret < 0) {
544 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
545 return ret;
546 }
547
548 for (i = 0; i < 32; i++) {
549 uint64_t vsr[2];
550
551 vsr[0] = float64_val(env->fpr[i]);
552 vsr[1] = env->vsr[i];
553 reg.addr = (uintptr_t) &vsr;
554 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
555
556 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
557 if (ret < 0) {
558 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
559 i, strerror(errno));
560 return ret;
561 }
562 }
563 }
564
565 if (env->insns_flags & PPC_ALTIVEC) {
566 reg.id = KVM_REG_PPC_VSCR;
567 reg.addr = (uintptr_t)&env->vscr;
568 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
569 if (ret < 0) {
570 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
571 return ret;
572 }
573
574 for (i = 0; i < 32; i++) {
575 reg.id = KVM_REG_PPC_VR(i);
576 reg.addr = (uintptr_t)&env->avr[i];
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
579 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
580 return ret;
581 }
582 }
583 }
584
585 return 0;
586 }
587
588 static int kvm_get_fp(CPUState *cs)
589 {
590 PowerPCCPU *cpu = POWERPC_CPU(cs);
591 CPUPPCState *env = &cpu->env;
592 struct kvm_one_reg reg;
593 int i;
594 int ret;
595
596 if (env->insns_flags & PPC_FLOAT) {
597 uint64_t fpscr;
598 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
599
600 reg.id = KVM_REG_PPC_FPSCR;
601 reg.addr = (uintptr_t)&fpscr;
602 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
603 if (ret < 0) {
604 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
605 return ret;
606 } else {
607 env->fpscr = fpscr;
608 }
609
610 for (i = 0; i < 32; i++) {
611 uint64_t vsr[2];
612
613 reg.addr = (uintptr_t) &vsr;
614 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
615
616 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
617 if (ret < 0) {
618 dprintf("Unable to get %s%d from KVM: %s\n",
619 vsx ? "VSR" : "FPR", i, strerror(errno));
620 return ret;
621 } else {
622 env->fpr[i] = vsr[0];
623 if (vsx) {
624 env->vsr[i] = vsr[1];
625 }
626 }
627 }
628 }
629
630 if (env->insns_flags & PPC_ALTIVEC) {
631 reg.id = KVM_REG_PPC_VSCR;
632 reg.addr = (uintptr_t)&env->vscr;
633 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
634 if (ret < 0) {
635 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
636 return ret;
637 }
638
639 for (i = 0; i < 32; i++) {
640 reg.id = KVM_REG_PPC_VR(i);
641 reg.addr = (uintptr_t)&env->avr[i];
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
644 dprintf("Unable to get VR%d from KVM: %s\n",
645 i, strerror(errno));
646 return ret;
647 }
648 }
649 }
650
651 return 0;
652 }
653
654 int kvm_arch_put_registers(CPUState *cs, int level)
655 {
656 PowerPCCPU *cpu = POWERPC_CPU(cs);
657 CPUPPCState *env = &cpu->env;
658 struct kvm_regs regs;
659 int ret;
660 int i;
661
662 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
663 if (ret < 0) {
664 return ret;
665 }
666
667 regs.ctr = env->ctr;
668 regs.lr = env->lr;
669 regs.xer = cpu_read_xer(env);
670 regs.msr = env->msr;
671 regs.pc = env->nip;
672
673 regs.srr0 = env->spr[SPR_SRR0];
674 regs.srr1 = env->spr[SPR_SRR1];
675
676 regs.sprg0 = env->spr[SPR_SPRG0];
677 regs.sprg1 = env->spr[SPR_SPRG1];
678 regs.sprg2 = env->spr[SPR_SPRG2];
679 regs.sprg3 = env->spr[SPR_SPRG3];
680 regs.sprg4 = env->spr[SPR_SPRG4];
681 regs.sprg5 = env->spr[SPR_SPRG5];
682 regs.sprg6 = env->spr[SPR_SPRG6];
683 regs.sprg7 = env->spr[SPR_SPRG7];
684
685 regs.pid = env->spr[SPR_BOOKE_PID];
686
687 for (i = 0;i < 32; i++)
688 regs.gpr[i] = env->gpr[i];
689
690 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
691 if (ret < 0)
692 return ret;
693
694 kvm_put_fp(cs);
695
696 if (env->tlb_dirty) {
697 kvm_sw_tlb_put(cpu);
698 env->tlb_dirty = false;
699 }
700
701 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
702 struct kvm_sregs sregs;
703
704 sregs.pvr = env->spr[SPR_PVR];
705
706 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
707
708 /* Sync SLB */
709 #ifdef TARGET_PPC64
710 for (i = 0; i < 64; i++) {
711 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
712 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
713 }
714 #endif
715
716 /* Sync SRs */
717 for (i = 0; i < 16; i++) {
718 sregs.u.s.ppc32.sr[i] = env->sr[i];
719 }
720
721 /* Sync BATs */
722 for (i = 0; i < 8; i++) {
723 /* Beware. We have to swap upper and lower bits here */
724 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
725 | env->DBAT[1][i];
726 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
727 | env->IBAT[1][i];
728 }
729
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
731 if (ret) {
732 return ret;
733 }
734 }
735
736 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
737 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
738 }
739
740 if (cap_one_reg) {
741 int i;
742
743 /* We deliberately ignore errors here, for kernels which have
744 * the ONE_REG calls, but don't support the specific
745 * registers, there's a reasonable chance things will still
746 * work, at least until we try to migrate. */
747 for (i = 0; i < 1024; i++) {
748 uint64_t id = env->spr_cb[i].one_reg_id;
749
750 if (id != 0) {
751 kvm_put_one_spr(cs, id, i);
752 }
753 }
754 }
755
756 return ret;
757 }
758
759 int kvm_arch_get_registers(CPUState *cs)
760 {
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
764 struct kvm_sregs sregs;
765 uint32_t cr;
766 int i, ret;
767
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
769 if (ret < 0)
770 return ret;
771
772 cr = regs.cr;
773 for (i = 7; i >= 0; i--) {
774 env->crf[i] = cr & 15;
775 cr >>= 4;
776 }
777
778 env->ctr = regs.ctr;
779 env->lr = regs.lr;
780 cpu_write_xer(env, regs.xer);
781 env->msr = regs.msr;
782 env->nip = regs.pc;
783
784 env->spr[SPR_SRR0] = regs.srr0;
785 env->spr[SPR_SRR1] = regs.srr1;
786
787 env->spr[SPR_SPRG0] = regs.sprg0;
788 env->spr[SPR_SPRG1] = regs.sprg1;
789 env->spr[SPR_SPRG2] = regs.sprg2;
790 env->spr[SPR_SPRG3] = regs.sprg3;
791 env->spr[SPR_SPRG4] = regs.sprg4;
792 env->spr[SPR_SPRG5] = regs.sprg5;
793 env->spr[SPR_SPRG6] = regs.sprg6;
794 env->spr[SPR_SPRG7] = regs.sprg7;
795
796 env->spr[SPR_BOOKE_PID] = regs.pid;
797
798 for (i = 0;i < 32; i++)
799 env->gpr[i] = regs.gpr[i];
800
801 kvm_get_fp(cs);
802
803 if (cap_booke_sregs) {
804 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
805 if (ret < 0) {
806 return ret;
807 }
808
809 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
810 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
811 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
812 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
813 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
814 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
815 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
816 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
817 env->spr[SPR_DECR] = sregs.u.e.dec;
818 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
819 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
820 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
821 }
822
823 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
824 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
825 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
826 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
827 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
828 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
829 }
830
831 if (sregs.u.e.features & KVM_SREGS_E_64) {
832 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
833 }
834
835 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
836 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
837 }
838
839 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
840 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
841 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
842 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
843 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
844 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
845 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
846 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
847 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
848 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
849 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
850 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
851 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
852 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
853 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
854 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
855 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
856
857 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
858 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
859 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
860 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
861 }
862
863 if (sregs.u.e.features & KVM_SREGS_E_PM) {
864 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
865 }
866
867 if (sregs.u.e.features & KVM_SREGS_E_PC) {
868 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
869 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
870 }
871 }
872
873 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
874 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
875 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
876 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
877 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
878 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
879 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
880 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
881 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
882 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
883 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
884 }
885
886 if (sregs.u.e.features & KVM_SREGS_EXP) {
887 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
888 }
889
890 if (sregs.u.e.features & KVM_SREGS_E_PD) {
891 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
892 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
893 }
894
895 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
896 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
897 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
898 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
899
900 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
901 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
902 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
903 }
904 }
905 }
906
907 if (cap_segstate) {
908 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
909 if (ret < 0) {
910 return ret;
911 }
912
913 ppc_store_sdr1(env, sregs.u.s.sdr1);
914
915 /* Sync SLB */
916 #ifdef TARGET_PPC64
917 for (i = 0; i < 64; i++) {
918 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
919 sregs.u.s.ppc64.slb[i].slbv);
920 }
921 #endif
922
923 /* Sync SRs */
924 for (i = 0; i < 16; i++) {
925 env->sr[i] = sregs.u.s.ppc32.sr[i];
926 }
927
928 /* Sync BATs */
929 for (i = 0; i < 8; i++) {
930 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
931 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
932 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
933 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
934 }
935 }
936
937 if (cap_hior) {
938 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
939 }
940
941 if (cap_one_reg) {
942 int i;
943
944 /* We deliberately ignore errors here, for kernels which have
945 * the ONE_REG calls, but don't support the specific
946 * registers, there's a reasonable chance things will still
947 * work, at least until we try to migrate. */
948 for (i = 0; i < 1024; i++) {
949 uint64_t id = env->spr_cb[i].one_reg_id;
950
951 if (id != 0) {
952 kvm_get_one_spr(cs, id, i);
953 }
954 }
955 }
956
957 return 0;
958 }
959
960 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
961 {
962 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
963
964 if (irq != PPC_INTERRUPT_EXT) {
965 return 0;
966 }
967
968 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
969 return 0;
970 }
971
972 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
973
974 return 0;
975 }
976
977 #if defined(TARGET_PPCEMB)
978 #define PPC_INPUT_INT PPC40x_INPUT_INT
979 #elif defined(TARGET_PPC64)
980 #define PPC_INPUT_INT PPC970_INPUT_INT
981 #else
982 #define PPC_INPUT_INT PPC6xx_INPUT_INT
983 #endif
984
985 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
986 {
987 PowerPCCPU *cpu = POWERPC_CPU(cs);
988 CPUPPCState *env = &cpu->env;
989 int r;
990 unsigned irq;
991
992 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
993 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
994 if (!cap_interrupt_level &&
995 run->ready_for_interrupt_injection &&
996 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
997 (env->irq_input_state & (1<<PPC_INPUT_INT)))
998 {
999 /* For now KVM disregards the 'irq' argument. However, in the
1000 * future KVM could cache it in-kernel to avoid a heavyweight exit
1001 * when reading the UIC.
1002 */
1003 irq = KVM_INTERRUPT_SET;
1004
1005 dprintf("injected interrupt %d\n", irq);
1006 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1007 if (r < 0) {
1008 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1009 }
1010
1011 /* Always wake up soon in case the interrupt was level based */
1012 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1013 (get_ticks_per_sec() / 50));
1014 }
1015
1016 /* We don't know if there are more interrupts pending after this. However,
1017 * the guest will return to userspace in the course of handling this one
1018 * anyways, so we will get a chance to deliver the rest. */
1019 }
1020
1021 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1022 {
1023 }
1024
1025 int kvm_arch_process_async_events(CPUState *cs)
1026 {
1027 PowerPCCPU *cpu = POWERPC_CPU(cs);
1028 return cpu->env.halted;
1029 }
1030
1031 static int kvmppc_handle_halt(CPUPPCState *env)
1032 {
1033 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1034 env->halted = 1;
1035 env->exception_index = EXCP_HLT;
1036 }
1037
1038 return 0;
1039 }
1040
1041 /* map dcr access to existing qemu dcr emulation */
1042 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1043 {
1044 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1045 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1046
1047 return 0;
1048 }
1049
1050 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1051 {
1052 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1053 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1054
1055 return 0;
1056 }
1057
1058 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1059 {
1060 PowerPCCPU *cpu = POWERPC_CPU(cs);
1061 CPUPPCState *env = &cpu->env;
1062 int ret;
1063
1064 switch (run->exit_reason) {
1065 case KVM_EXIT_DCR:
1066 if (run->dcr.is_write) {
1067 dprintf("handle dcr write\n");
1068 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1069 } else {
1070 dprintf("handle dcr read\n");
1071 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1072 }
1073 break;
1074 case KVM_EXIT_HLT:
1075 dprintf("handle halt\n");
1076 ret = kvmppc_handle_halt(env);
1077 break;
1078 #ifdef CONFIG_PSERIES
1079 case KVM_EXIT_PAPR_HCALL:
1080 dprintf("handle PAPR hypercall\n");
1081 run->papr_hcall.ret = spapr_hypercall(cpu,
1082 run->papr_hcall.nr,
1083 run->papr_hcall.args);
1084 ret = 0;
1085 break;
1086 #endif
1087 case KVM_EXIT_EPR:
1088 dprintf("handle epr\n");
1089 run->epr.epr = ldl_phys(env->mpic_iack);
1090 ret = 0;
1091 break;
1092 default:
1093 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1094 ret = -1;
1095 break;
1096 }
1097
1098 return ret;
1099 }
1100
1101 static int read_cpuinfo(const char *field, char *value, int len)
1102 {
1103 FILE *f;
1104 int ret = -1;
1105 int field_len = strlen(field);
1106 char line[512];
1107
1108 f = fopen("/proc/cpuinfo", "r");
1109 if (!f) {
1110 return -1;
1111 }
1112
1113 do {
1114 if(!fgets(line, sizeof(line), f)) {
1115 break;
1116 }
1117 if (!strncmp(line, field, field_len)) {
1118 pstrcpy(value, len, line);
1119 ret = 0;
1120 break;
1121 }
1122 } while(*line);
1123
1124 fclose(f);
1125
1126 return ret;
1127 }
1128
1129 uint32_t kvmppc_get_tbfreq(void)
1130 {
1131 char line[512];
1132 char *ns;
1133 uint32_t retval = get_ticks_per_sec();
1134
1135 if (read_cpuinfo("timebase", line, sizeof(line))) {
1136 return retval;
1137 }
1138
1139 if (!(ns = strchr(line, ':'))) {
1140 return retval;
1141 }
1142
1143 ns++;
1144
1145 retval = atoi(ns);
1146 return retval;
1147 }
1148
1149 /* Try to find a device tree node for a CPU with clock-frequency property */
1150 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1151 {
1152 struct dirent *dirp;
1153 DIR *dp;
1154
1155 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1156 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1157 return -1;
1158 }
1159
1160 buf[0] = '\0';
1161 while ((dirp = readdir(dp)) != NULL) {
1162 FILE *f;
1163 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1164 dirp->d_name);
1165 f = fopen(buf, "r");
1166 if (f) {
1167 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1168 fclose(f);
1169 break;
1170 }
1171 buf[0] = '\0';
1172 }
1173 closedir(dp);
1174 if (buf[0] == '\0') {
1175 printf("Unknown host!\n");
1176 return -1;
1177 }
1178
1179 return 0;
1180 }
1181
1182 /* Read a CPU node property from the host device tree that's a single
1183 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1184 * (can't find or open the property, or doesn't understand the
1185 * format) */
1186 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1187 {
1188 char buf[PATH_MAX];
1189 union {
1190 uint32_t v32;
1191 uint64_t v64;
1192 } u;
1193 FILE *f;
1194 int len;
1195
1196 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1197 return -1;
1198 }
1199
1200 strncat(buf, "/", sizeof(buf) - strlen(buf));
1201 strncat(buf, propname, sizeof(buf) - strlen(buf));
1202
1203 f = fopen(buf, "rb");
1204 if (!f) {
1205 return -1;
1206 }
1207
1208 len = fread(&u, 1, sizeof(u), f);
1209 fclose(f);
1210 switch (len) {
1211 case 4:
1212 /* property is a 32-bit quantity */
1213 return be32_to_cpu(u.v32);
1214 case 8:
1215 return be64_to_cpu(u.v64);
1216 }
1217
1218 return 0;
1219 }
1220
1221 uint64_t kvmppc_get_clockfreq(void)
1222 {
1223 return kvmppc_read_int_cpu_dt("clock-frequency");
1224 }
1225
1226 uint32_t kvmppc_get_vmx(void)
1227 {
1228 return kvmppc_read_int_cpu_dt("ibm,vmx");
1229 }
1230
1231 uint32_t kvmppc_get_dfp(void)
1232 {
1233 return kvmppc_read_int_cpu_dt("ibm,dfp");
1234 }
1235
1236 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1237 {
1238 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1239 CPUState *cs = CPU(cpu);
1240
1241 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1242 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1243 return 0;
1244 }
1245
1246 return 1;
1247 }
1248
1249 int kvmppc_get_hasidle(CPUPPCState *env)
1250 {
1251 struct kvm_ppc_pvinfo pvinfo;
1252
1253 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1254 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1255 return 1;
1256 }
1257
1258 return 0;
1259 }
1260
1261 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1262 {
1263 uint32_t *hc = (uint32_t*)buf;
1264 struct kvm_ppc_pvinfo pvinfo;
1265
1266 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1267 memcpy(buf, pvinfo.hcall, buf_len);
1268 return 0;
1269 }
1270
1271 /*
1272 * Fallback to always fail hypercalls:
1273 *
1274 * li r3, -1
1275 * nop
1276 * nop
1277 * nop
1278 */
1279
1280 hc[0] = 0x3860ffff;
1281 hc[1] = 0x60000000;
1282 hc[2] = 0x60000000;
1283 hc[3] = 0x60000000;
1284
1285 return 0;
1286 }
1287
1288 void kvmppc_set_papr(PowerPCCPU *cpu)
1289 {
1290 CPUPPCState *env = &cpu->env;
1291 CPUState *cs = CPU(cpu);
1292 struct kvm_enable_cap cap = {};
1293 int ret;
1294
1295 cap.cap = KVM_CAP_PPC_PAPR;
1296 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1297
1298 if (ret) {
1299 cpu_abort(env, "This KVM version does not support PAPR\n");
1300 }
1301 }
1302
1303 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1304 {
1305 CPUPPCState *env = &cpu->env;
1306 CPUState *cs = CPU(cpu);
1307 struct kvm_enable_cap cap = {};
1308 int ret;
1309
1310 cap.cap = KVM_CAP_PPC_EPR;
1311 cap.args[0] = mpic_proxy;
1312 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1313
1314 if (ret && mpic_proxy) {
1315 cpu_abort(env, "This KVM version does not support EPR\n");
1316 }
1317 }
1318
1319 int kvmppc_smt_threads(void)
1320 {
1321 return cap_ppc_smt ? cap_ppc_smt : 1;
1322 }
1323
1324 #ifdef TARGET_PPC64
1325 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1326 {
1327 void *rma;
1328 off_t size;
1329 int fd;
1330 struct kvm_allocate_rma ret;
1331 MemoryRegion *rma_region;
1332
1333 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1334 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1335 * not necessary on this hardware
1336 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1337 *
1338 * FIXME: We should allow the user to force contiguous RMA
1339 * allocation in the cap_ppc_rma==1 case.
1340 */
1341 if (cap_ppc_rma < 2) {
1342 return 0;
1343 }
1344
1345 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1346 if (fd < 0) {
1347 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1348 strerror(errno));
1349 return -1;
1350 }
1351
1352 size = MIN(ret.rma_size, 256ul << 20);
1353
1354 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1355 if (rma == MAP_FAILED) {
1356 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1357 return -1;
1358 };
1359
1360 rma_region = g_new(MemoryRegion, 1);
1361 memory_region_init_ram_ptr(rma_region, name, size, rma);
1362 vmstate_register_ram_global(rma_region);
1363 memory_region_add_subregion(sysmem, 0, rma_region);
1364
1365 return size;
1366 }
1367
1368 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1369 {
1370 if (cap_ppc_rma >= 2) {
1371 return current_size;
1372 }
1373 return MIN(current_size,
1374 getrampagesize() << (hash_shift - 7));
1375 }
1376 #endif
1377
1378 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1379 {
1380 struct kvm_create_spapr_tce args = {
1381 .liobn = liobn,
1382 .window_size = window_size,
1383 };
1384 long len;
1385 int fd;
1386 void *table;
1387
1388 /* Must set fd to -1 so we don't try to munmap when called for
1389 * destroying the table, which the upper layers -will- do
1390 */
1391 *pfd = -1;
1392 if (!cap_spapr_tce) {
1393 return NULL;
1394 }
1395
1396 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1397 if (fd < 0) {
1398 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1399 liobn);
1400 return NULL;
1401 }
1402
1403 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1404 /* FIXME: round this up to page size */
1405
1406 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1407 if (table == MAP_FAILED) {
1408 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1409 liobn);
1410 close(fd);
1411 return NULL;
1412 }
1413
1414 *pfd = fd;
1415 return table;
1416 }
1417
1418 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1419 {
1420 long len;
1421
1422 if (fd < 0) {
1423 return -1;
1424 }
1425
1426 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1427 if ((munmap(table, len) < 0) ||
1428 (close(fd) < 0)) {
1429 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1430 strerror(errno));
1431 /* Leak the table */
1432 }
1433
1434 return 0;
1435 }
1436
1437 int kvmppc_reset_htab(int shift_hint)
1438 {
1439 uint32_t shift = shift_hint;
1440
1441 if (!kvm_enabled()) {
1442 /* Full emulation, tell caller to allocate htab itself */
1443 return 0;
1444 }
1445 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1446 int ret;
1447 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1448 if (ret == -ENOTTY) {
1449 /* At least some versions of PR KVM advertise the
1450 * capability, but don't implement the ioctl(). Oops.
1451 * Return 0 so that we allocate the htab in qemu, as is
1452 * correct for PR. */
1453 return 0;
1454 } else if (ret < 0) {
1455 return ret;
1456 }
1457 return shift;
1458 }
1459
1460 /* We have a kernel that predates the htab reset calls. For PR
1461 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1462 * this era, it has allocated a 16MB fixed size hash table
1463 * already. Kernels of this era have the GET_PVINFO capability
1464 * only on PR, so we use this hack to determine the right
1465 * answer */
1466 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1467 /* PR - tell caller to allocate htab */
1468 return 0;
1469 } else {
1470 /* HV - assume 16MB kernel allocated htab */
1471 return 24;
1472 }
1473 }
1474
1475 static inline uint32_t mfpvr(void)
1476 {
1477 uint32_t pvr;
1478
1479 asm ("mfpvr %0"
1480 : "=r"(pvr));
1481 return pvr;
1482 }
1483
1484 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1485 {
1486 if (on) {
1487 *word |= flags;
1488 } else {
1489 *word &= ~flags;
1490 }
1491 }
1492
1493 static void kvmppc_host_cpu_initfn(Object *obj)
1494 {
1495 assert(kvm_enabled());
1496 }
1497
1498 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1499 {
1500 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1501 uint32_t vmx = kvmppc_get_vmx();
1502 uint32_t dfp = kvmppc_get_dfp();
1503
1504 /* Now fix up the class with information we can query from the host */
1505
1506 if (vmx != -1) {
1507 /* Only override when we know what the host supports */
1508 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1509 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1510 }
1511 if (dfp != -1) {
1512 /* Only override when we know what the host supports */
1513 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1514 }
1515 }
1516
1517 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1518 {
1519 CPUState *cs = CPU(cpu);
1520 int smt;
1521
1522 /* Adjust cpu index for SMT */
1523 smt = kvmppc_smt_threads();
1524 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1525 + (cs->cpu_index % smp_threads);
1526
1527 return 0;
1528 }
1529
1530 static int kvm_ppc_register_host_cpu_type(void)
1531 {
1532 TypeInfo type_info = {
1533 .name = TYPE_HOST_POWERPC_CPU,
1534 .instance_init = kvmppc_host_cpu_initfn,
1535 .class_init = kvmppc_host_cpu_class_init,
1536 };
1537 uint32_t host_pvr = mfpvr();
1538 PowerPCCPUClass *pvr_pcc;
1539
1540 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1541 if (pvr_pcc == NULL) {
1542 return -1;
1543 }
1544 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1545 type_register(&type_info);
1546 return 0;
1547 }
1548
1549
1550 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1551 {
1552 return true;
1553 }
1554
1555 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1556 {
1557 return 1;
1558 }
1559
1560 int kvm_arch_on_sigbus(int code, void *addr)
1561 {
1562 return 1;
1563 }