]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
Enable kvm emulated watchdog
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/ppc/spapr.h"
35 #include "mmu-hash64.h"
36
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "sysemu/watchdog.h"
41
42 //#define DEBUG_KVM
43
44 #ifdef DEBUG_KVM
45 #define dprintf(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #else
48 #define dprintf(fmt, ...) \
49 do { } while (0)
50 #endif
51
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53
54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
56 };
57
58 static int cap_interrupt_unset = false;
59 static int cap_interrupt_level = false;
60 static int cap_segstate;
61 static int cap_booke_sregs;
62 static int cap_ppc_smt;
63 static int cap_ppc_rma;
64 static int cap_spapr_tce;
65 static int cap_hior;
66 static int cap_one_reg;
67 static int cap_epr;
68 static int cap_ppc_watchdog;
69
70 /* XXX We have a race condition where we actually have a level triggered
71 * interrupt, but the infrastructure can't expose that yet, so the guest
72 * takes but ignores it, goes to sleep and never gets notified that there's
73 * still an interrupt pending.
74 *
75 * As a quick workaround, let's just wake up again 20 ms after we injected
76 * an interrupt. That way we can assure that we're always reinjecting
77 * interrupts in case the guest swallowed them.
78 */
79 static QEMUTimer *idle_timer;
80
81 static void kvm_kick_cpu(void *opaque)
82 {
83 PowerPCCPU *cpu = opaque;
84
85 qemu_cpu_kick(CPU(cpu));
86 }
87
88 static int kvm_ppc_register_host_cpu_type(void);
89
90 int kvm_arch_init(KVMState *s)
91 {
92 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
93 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
94 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
95 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
96 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
97 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
98 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
99 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
100 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
101 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
102 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
103
104 if (!cap_interrupt_level) {
105 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
106 "VM to stall at times!\n");
107 }
108
109 kvm_ppc_register_host_cpu_type();
110
111 return 0;
112 }
113
114 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
115 {
116 CPUPPCState *cenv = &cpu->env;
117 CPUState *cs = CPU(cpu);
118 struct kvm_sregs sregs;
119 int ret;
120
121 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
122 /* What we're really trying to say is "if we're on BookE, we use
123 the native PVR for now". This is the only sane way to check
124 it though, so we potentially confuse users that they can run
125 BookE guests on BookS. Let's hope nobody dares enough :) */
126 return 0;
127 } else {
128 if (!cap_segstate) {
129 fprintf(stderr, "kvm error: missing PVR setting capability\n");
130 return -ENOSYS;
131 }
132 }
133
134 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
135 if (ret) {
136 return ret;
137 }
138
139 sregs.pvr = cenv->spr[SPR_PVR];
140 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
141 }
142
143 /* Set up a shared TLB array with KVM */
144 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
145 {
146 CPUPPCState *env = &cpu->env;
147 CPUState *cs = CPU(cpu);
148 struct kvm_book3e_206_tlb_params params = {};
149 struct kvm_config_tlb cfg = {};
150 struct kvm_enable_cap encap = {};
151 unsigned int entries = 0;
152 int ret, i;
153
154 if (!kvm_enabled() ||
155 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
156 return 0;
157 }
158
159 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
160
161 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
162 params.tlb_sizes[i] = booke206_tlb_size(env, i);
163 params.tlb_ways[i] = booke206_tlb_ways(env, i);
164 entries += params.tlb_sizes[i];
165 }
166
167 assert(entries == env->nb_tlb);
168 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
169
170 env->tlb_dirty = true;
171
172 cfg.array = (uintptr_t)env->tlb.tlbm;
173 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
174 cfg.params = (uintptr_t)&params;
175 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
176
177 encap.cap = KVM_CAP_SW_TLB;
178 encap.args[0] = (uintptr_t)&cfg;
179
180 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
181 if (ret < 0) {
182 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
183 __func__, strerror(-ret));
184 return ret;
185 }
186
187 env->kvm_sw_tlb = true;
188 return 0;
189 }
190
191
192 #if defined(TARGET_PPC64)
193 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
194 struct kvm_ppc_smmu_info *info)
195 {
196 CPUPPCState *env = &cpu->env;
197 CPUState *cs = CPU(cpu);
198
199 memset(info, 0, sizeof(*info));
200
201 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
202 * need to "guess" what the supported page sizes are.
203 *
204 * For that to work we make a few assumptions:
205 *
206 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
207 * KVM which only supports 4K and 16M pages, but supports them
208 * regardless of the backing store characteritics. We also don't
209 * support 1T segments.
210 *
211 * This is safe as if HV KVM ever supports that capability or PR
212 * KVM grows supports for more page/segment sizes, those versions
213 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
214 * will not hit this fallback
215 *
216 * - Else we are running HV KVM. This means we only support page
217 * sizes that fit in the backing store. Additionally we only
218 * advertize 64K pages if the processor is ARCH 2.06 and we assume
219 * P7 encodings for the SLB and hash table. Here too, we assume
220 * support for any newer processor will mean a kernel that
221 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
222 * this fallback.
223 */
224 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
225 /* No flags */
226 info->flags = 0;
227 info->slb_size = 64;
228
229 /* Standard 4k base page size segment */
230 info->sps[0].page_shift = 12;
231 info->sps[0].slb_enc = 0;
232 info->sps[0].enc[0].page_shift = 12;
233 info->sps[0].enc[0].pte_enc = 0;
234
235 /* Standard 16M large page size segment */
236 info->sps[1].page_shift = 24;
237 info->sps[1].slb_enc = SLB_VSID_L;
238 info->sps[1].enc[0].page_shift = 24;
239 info->sps[1].enc[0].pte_enc = 0;
240 } else {
241 int i = 0;
242
243 /* HV KVM has backing store size restrictions */
244 info->flags = KVM_PPC_PAGE_SIZES_REAL;
245
246 if (env->mmu_model & POWERPC_MMU_1TSEG) {
247 info->flags |= KVM_PPC_1T_SEGMENTS;
248 }
249
250 if (env->mmu_model == POWERPC_MMU_2_06) {
251 info->slb_size = 32;
252 } else {
253 info->slb_size = 64;
254 }
255
256 /* Standard 4k base page size segment */
257 info->sps[i].page_shift = 12;
258 info->sps[i].slb_enc = 0;
259 info->sps[i].enc[0].page_shift = 12;
260 info->sps[i].enc[0].pte_enc = 0;
261 i++;
262
263 /* 64K on MMU 2.06 */
264 if (env->mmu_model == POWERPC_MMU_2_06) {
265 info->sps[i].page_shift = 16;
266 info->sps[i].slb_enc = 0x110;
267 info->sps[i].enc[0].page_shift = 16;
268 info->sps[i].enc[0].pte_enc = 1;
269 i++;
270 }
271
272 /* Standard 16M large page size segment */
273 info->sps[i].page_shift = 24;
274 info->sps[i].slb_enc = SLB_VSID_L;
275 info->sps[i].enc[0].page_shift = 24;
276 info->sps[i].enc[0].pte_enc = 0;
277 }
278 }
279
280 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
281 {
282 CPUState *cs = CPU(cpu);
283 int ret;
284
285 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
286 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
287 if (ret == 0) {
288 return;
289 }
290 }
291
292 kvm_get_fallback_smmu_info(cpu, info);
293 }
294
295 static long getrampagesize(void)
296 {
297 struct statfs fs;
298 int ret;
299
300 if (!mem_path) {
301 /* guest RAM is backed by normal anonymous pages */
302 return getpagesize();
303 }
304
305 do {
306 ret = statfs(mem_path, &fs);
307 } while (ret != 0 && errno == EINTR);
308
309 if (ret != 0) {
310 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
311 strerror(errno));
312 exit(1);
313 }
314
315 #define HUGETLBFS_MAGIC 0x958458f6
316
317 if (fs.f_type != HUGETLBFS_MAGIC) {
318 /* Explicit mempath, but it's ordinary pages */
319 return getpagesize();
320 }
321
322 /* It's hugepage, return the huge page size */
323 return fs.f_bsize;
324 }
325
326 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
327 {
328 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
329 return true;
330 }
331
332 return (1ul << shift) <= rampgsize;
333 }
334
335 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
336 {
337 static struct kvm_ppc_smmu_info smmu_info;
338 static bool has_smmu_info;
339 CPUPPCState *env = &cpu->env;
340 long rampagesize;
341 int iq, ik, jq, jk;
342
343 /* We only handle page sizes for 64-bit server guests for now */
344 if (!(env->mmu_model & POWERPC_MMU_64)) {
345 return;
346 }
347
348 /* Collect MMU info from kernel if not already */
349 if (!has_smmu_info) {
350 kvm_get_smmu_info(cpu, &smmu_info);
351 has_smmu_info = true;
352 }
353
354 rampagesize = getrampagesize();
355
356 /* Convert to QEMU form */
357 memset(&env->sps, 0, sizeof(env->sps));
358
359 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
360 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
361 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
362
363 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
364 ksps->page_shift)) {
365 continue;
366 }
367 qsps->page_shift = ksps->page_shift;
368 qsps->slb_enc = ksps->slb_enc;
369 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
370 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
371 ksps->enc[jk].page_shift)) {
372 continue;
373 }
374 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
375 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
376 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
377 break;
378 }
379 }
380 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
381 break;
382 }
383 }
384 env->slb_nr = smmu_info.slb_size;
385 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
386 env->mmu_model |= POWERPC_MMU_1TSEG;
387 } else {
388 env->mmu_model &= ~POWERPC_MMU_1TSEG;
389 }
390 }
391 #else /* defined (TARGET_PPC64) */
392
393 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
394 {
395 }
396
397 #endif /* !defined (TARGET_PPC64) */
398
399 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
400 {
401 return cpu->cpu_index;
402 }
403
404 int kvm_arch_init_vcpu(CPUState *cs)
405 {
406 PowerPCCPU *cpu = POWERPC_CPU(cs);
407 CPUPPCState *cenv = &cpu->env;
408 int ret;
409
410 /* Gather server mmu info from KVM and update the CPU state */
411 kvm_fixup_page_sizes(cpu);
412
413 /* Synchronize sregs with kvm */
414 ret = kvm_arch_sync_sregs(cpu);
415 if (ret) {
416 return ret;
417 }
418
419 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
420
421 /* Some targets support access to KVM's guest TLB. */
422 switch (cenv->mmu_model) {
423 case POWERPC_MMU_BOOKE206:
424 ret = kvm_booke206_tlb_init(cpu);
425 break;
426 default:
427 break;
428 }
429
430 return ret;
431 }
432
433 void kvm_arch_reset_vcpu(CPUState *cpu)
434 {
435 }
436
437 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
438 {
439 CPUPPCState *env = &cpu->env;
440 CPUState *cs = CPU(cpu);
441 struct kvm_dirty_tlb dirty_tlb;
442 unsigned char *bitmap;
443 int ret;
444
445 if (!env->kvm_sw_tlb) {
446 return;
447 }
448
449 bitmap = g_malloc((env->nb_tlb + 7) / 8);
450 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
451
452 dirty_tlb.bitmap = (uintptr_t)bitmap;
453 dirty_tlb.num_dirty = env->nb_tlb;
454
455 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
456 if (ret) {
457 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
458 __func__, strerror(-ret));
459 }
460
461 g_free(bitmap);
462 }
463
464 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
465 {
466 PowerPCCPU *cpu = POWERPC_CPU(cs);
467 CPUPPCState *env = &cpu->env;
468 union {
469 uint32_t u32;
470 uint64_t u64;
471 } val;
472 struct kvm_one_reg reg = {
473 .id = id,
474 .addr = (uintptr_t) &val,
475 };
476 int ret;
477
478 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
479 if (ret != 0) {
480 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
481 spr, strerror(errno));
482 } else {
483 switch (id & KVM_REG_SIZE_MASK) {
484 case KVM_REG_SIZE_U32:
485 env->spr[spr] = val.u32;
486 break;
487
488 case KVM_REG_SIZE_U64:
489 env->spr[spr] = val.u64;
490 break;
491
492 default:
493 /* Don't handle this size yet */
494 abort();
495 }
496 }
497 }
498
499 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
500 {
501 PowerPCCPU *cpu = POWERPC_CPU(cs);
502 CPUPPCState *env = &cpu->env;
503 union {
504 uint32_t u32;
505 uint64_t u64;
506 } val;
507 struct kvm_one_reg reg = {
508 .id = id,
509 .addr = (uintptr_t) &val,
510 };
511 int ret;
512
513 switch (id & KVM_REG_SIZE_MASK) {
514 case KVM_REG_SIZE_U32:
515 val.u32 = env->spr[spr];
516 break;
517
518 case KVM_REG_SIZE_U64:
519 val.u64 = env->spr[spr];
520 break;
521
522 default:
523 /* Don't handle this size yet */
524 abort();
525 }
526
527 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
528 if (ret != 0) {
529 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
530 spr, strerror(errno));
531 }
532 }
533
534 static int kvm_put_fp(CPUState *cs)
535 {
536 PowerPCCPU *cpu = POWERPC_CPU(cs);
537 CPUPPCState *env = &cpu->env;
538 struct kvm_one_reg reg;
539 int i;
540 int ret;
541
542 if (env->insns_flags & PPC_FLOAT) {
543 uint64_t fpscr = env->fpscr;
544 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
545
546 reg.id = KVM_REG_PPC_FPSCR;
547 reg.addr = (uintptr_t)&fpscr;
548 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
549 if (ret < 0) {
550 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
551 return ret;
552 }
553
554 for (i = 0; i < 32; i++) {
555 uint64_t vsr[2];
556
557 vsr[0] = float64_val(env->fpr[i]);
558 vsr[1] = env->vsr[i];
559 reg.addr = (uintptr_t) &vsr;
560 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
561
562 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
563 if (ret < 0) {
564 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
565 i, strerror(errno));
566 return ret;
567 }
568 }
569 }
570
571 if (env->insns_flags & PPC_ALTIVEC) {
572 reg.id = KVM_REG_PPC_VSCR;
573 reg.addr = (uintptr_t)&env->vscr;
574 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
575 if (ret < 0) {
576 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
577 return ret;
578 }
579
580 for (i = 0; i < 32; i++) {
581 reg.id = KVM_REG_PPC_VR(i);
582 reg.addr = (uintptr_t)&env->avr[i];
583 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
584 if (ret < 0) {
585 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
586 return ret;
587 }
588 }
589 }
590
591 return 0;
592 }
593
594 static int kvm_get_fp(CPUState *cs)
595 {
596 PowerPCCPU *cpu = POWERPC_CPU(cs);
597 CPUPPCState *env = &cpu->env;
598 struct kvm_one_reg reg;
599 int i;
600 int ret;
601
602 if (env->insns_flags & PPC_FLOAT) {
603 uint64_t fpscr;
604 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
605
606 reg.id = KVM_REG_PPC_FPSCR;
607 reg.addr = (uintptr_t)&fpscr;
608 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
609 if (ret < 0) {
610 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
611 return ret;
612 } else {
613 env->fpscr = fpscr;
614 }
615
616 for (i = 0; i < 32; i++) {
617 uint64_t vsr[2];
618
619 reg.addr = (uintptr_t) &vsr;
620 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
621
622 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
623 if (ret < 0) {
624 dprintf("Unable to get %s%d from KVM: %s\n",
625 vsx ? "VSR" : "FPR", i, strerror(errno));
626 return ret;
627 } else {
628 env->fpr[i] = vsr[0];
629 if (vsx) {
630 env->vsr[i] = vsr[1];
631 }
632 }
633 }
634 }
635
636 if (env->insns_flags & PPC_ALTIVEC) {
637 reg.id = KVM_REG_PPC_VSCR;
638 reg.addr = (uintptr_t)&env->vscr;
639 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
640 if (ret < 0) {
641 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
642 return ret;
643 }
644
645 for (i = 0; i < 32; i++) {
646 reg.id = KVM_REG_PPC_VR(i);
647 reg.addr = (uintptr_t)&env->avr[i];
648 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
649 if (ret < 0) {
650 dprintf("Unable to get VR%d from KVM: %s\n",
651 i, strerror(errno));
652 return ret;
653 }
654 }
655 }
656
657 return 0;
658 }
659
660 int kvm_arch_put_registers(CPUState *cs, int level)
661 {
662 PowerPCCPU *cpu = POWERPC_CPU(cs);
663 CPUPPCState *env = &cpu->env;
664 struct kvm_regs regs;
665 int ret;
666 int i;
667
668 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
669 if (ret < 0) {
670 return ret;
671 }
672
673 regs.ctr = env->ctr;
674 regs.lr = env->lr;
675 regs.xer = cpu_read_xer(env);
676 regs.msr = env->msr;
677 regs.pc = env->nip;
678
679 regs.srr0 = env->spr[SPR_SRR0];
680 regs.srr1 = env->spr[SPR_SRR1];
681
682 regs.sprg0 = env->spr[SPR_SPRG0];
683 regs.sprg1 = env->spr[SPR_SPRG1];
684 regs.sprg2 = env->spr[SPR_SPRG2];
685 regs.sprg3 = env->spr[SPR_SPRG3];
686 regs.sprg4 = env->spr[SPR_SPRG4];
687 regs.sprg5 = env->spr[SPR_SPRG5];
688 regs.sprg6 = env->spr[SPR_SPRG6];
689 regs.sprg7 = env->spr[SPR_SPRG7];
690
691 regs.pid = env->spr[SPR_BOOKE_PID];
692
693 for (i = 0;i < 32; i++)
694 regs.gpr[i] = env->gpr[i];
695
696 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
697 if (ret < 0)
698 return ret;
699
700 kvm_put_fp(cs);
701
702 if (env->tlb_dirty) {
703 kvm_sw_tlb_put(cpu);
704 env->tlb_dirty = false;
705 }
706
707 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
708 struct kvm_sregs sregs;
709
710 sregs.pvr = env->spr[SPR_PVR];
711
712 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
713
714 /* Sync SLB */
715 #ifdef TARGET_PPC64
716 for (i = 0; i < 64; i++) {
717 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
718 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
719 }
720 #endif
721
722 /* Sync SRs */
723 for (i = 0; i < 16; i++) {
724 sregs.u.s.ppc32.sr[i] = env->sr[i];
725 }
726
727 /* Sync BATs */
728 for (i = 0; i < 8; i++) {
729 /* Beware. We have to swap upper and lower bits here */
730 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
731 | env->DBAT[1][i];
732 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
733 | env->IBAT[1][i];
734 }
735
736 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
737 if (ret) {
738 return ret;
739 }
740 }
741
742 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
743 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
744 }
745
746 if (cap_one_reg) {
747 int i;
748
749 /* We deliberately ignore errors here, for kernels which have
750 * the ONE_REG calls, but don't support the specific
751 * registers, there's a reasonable chance things will still
752 * work, at least until we try to migrate. */
753 for (i = 0; i < 1024; i++) {
754 uint64_t id = env->spr_cb[i].one_reg_id;
755
756 if (id != 0) {
757 kvm_put_one_spr(cs, id, i);
758 }
759 }
760 }
761
762 return ret;
763 }
764
765 int kvm_arch_get_registers(CPUState *cs)
766 {
767 PowerPCCPU *cpu = POWERPC_CPU(cs);
768 CPUPPCState *env = &cpu->env;
769 struct kvm_regs regs;
770 struct kvm_sregs sregs;
771 uint32_t cr;
772 int i, ret;
773
774 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
775 if (ret < 0)
776 return ret;
777
778 cr = regs.cr;
779 for (i = 7; i >= 0; i--) {
780 env->crf[i] = cr & 15;
781 cr >>= 4;
782 }
783
784 env->ctr = regs.ctr;
785 env->lr = regs.lr;
786 cpu_write_xer(env, regs.xer);
787 env->msr = regs.msr;
788 env->nip = regs.pc;
789
790 env->spr[SPR_SRR0] = regs.srr0;
791 env->spr[SPR_SRR1] = regs.srr1;
792
793 env->spr[SPR_SPRG0] = regs.sprg0;
794 env->spr[SPR_SPRG1] = regs.sprg1;
795 env->spr[SPR_SPRG2] = regs.sprg2;
796 env->spr[SPR_SPRG3] = regs.sprg3;
797 env->spr[SPR_SPRG4] = regs.sprg4;
798 env->spr[SPR_SPRG5] = regs.sprg5;
799 env->spr[SPR_SPRG6] = regs.sprg6;
800 env->spr[SPR_SPRG7] = regs.sprg7;
801
802 env->spr[SPR_BOOKE_PID] = regs.pid;
803
804 for (i = 0;i < 32; i++)
805 env->gpr[i] = regs.gpr[i];
806
807 kvm_get_fp(cs);
808
809 if (cap_booke_sregs) {
810 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
811 if (ret < 0) {
812 return ret;
813 }
814
815 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
816 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
817 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
818 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
819 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
820 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
821 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
822 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
823 env->spr[SPR_DECR] = sregs.u.e.dec;
824 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
825 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
826 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
827 }
828
829 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
830 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
831 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
832 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
833 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
834 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
835 }
836
837 if (sregs.u.e.features & KVM_SREGS_E_64) {
838 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
839 }
840
841 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
842 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
843 }
844
845 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
846 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
847 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
848 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
849 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
850 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
851 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
852 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
853 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
854 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
855 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
856 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
857 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
858 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
859 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
860 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
861 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
862
863 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
864 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
865 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
866 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
867 }
868
869 if (sregs.u.e.features & KVM_SREGS_E_PM) {
870 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
871 }
872
873 if (sregs.u.e.features & KVM_SREGS_E_PC) {
874 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
875 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
876 }
877 }
878
879 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
880 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
881 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
882 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
883 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
884 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
885 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
886 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
887 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
888 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
889 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
890 }
891
892 if (sregs.u.e.features & KVM_SREGS_EXP) {
893 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
894 }
895
896 if (sregs.u.e.features & KVM_SREGS_E_PD) {
897 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
898 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
899 }
900
901 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
902 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
903 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
904 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
905
906 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
907 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
908 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
909 }
910 }
911 }
912
913 if (cap_segstate) {
914 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
915 if (ret < 0) {
916 return ret;
917 }
918
919 ppc_store_sdr1(env, sregs.u.s.sdr1);
920
921 /* Sync SLB */
922 #ifdef TARGET_PPC64
923 for (i = 0; i < 64; i++) {
924 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
925 sregs.u.s.ppc64.slb[i].slbv);
926 }
927 #endif
928
929 /* Sync SRs */
930 for (i = 0; i < 16; i++) {
931 env->sr[i] = sregs.u.s.ppc32.sr[i];
932 }
933
934 /* Sync BATs */
935 for (i = 0; i < 8; i++) {
936 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
937 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
938 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
939 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
940 }
941 }
942
943 if (cap_hior) {
944 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
945 }
946
947 if (cap_one_reg) {
948 int i;
949
950 /* We deliberately ignore errors here, for kernels which have
951 * the ONE_REG calls, but don't support the specific
952 * registers, there's a reasonable chance things will still
953 * work, at least until we try to migrate. */
954 for (i = 0; i < 1024; i++) {
955 uint64_t id = env->spr_cb[i].one_reg_id;
956
957 if (id != 0) {
958 kvm_get_one_spr(cs, id, i);
959 }
960 }
961 }
962
963 return 0;
964 }
965
966 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
967 {
968 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
969
970 if (irq != PPC_INTERRUPT_EXT) {
971 return 0;
972 }
973
974 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
975 return 0;
976 }
977
978 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
979
980 return 0;
981 }
982
983 #if defined(TARGET_PPCEMB)
984 #define PPC_INPUT_INT PPC40x_INPUT_INT
985 #elif defined(TARGET_PPC64)
986 #define PPC_INPUT_INT PPC970_INPUT_INT
987 #else
988 #define PPC_INPUT_INT PPC6xx_INPUT_INT
989 #endif
990
991 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
992 {
993 PowerPCCPU *cpu = POWERPC_CPU(cs);
994 CPUPPCState *env = &cpu->env;
995 int r;
996 unsigned irq;
997
998 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
999 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1000 if (!cap_interrupt_level &&
1001 run->ready_for_interrupt_injection &&
1002 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1003 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1004 {
1005 /* For now KVM disregards the 'irq' argument. However, in the
1006 * future KVM could cache it in-kernel to avoid a heavyweight exit
1007 * when reading the UIC.
1008 */
1009 irq = KVM_INTERRUPT_SET;
1010
1011 dprintf("injected interrupt %d\n", irq);
1012 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1013 if (r < 0) {
1014 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1015 }
1016
1017 /* Always wake up soon in case the interrupt was level based */
1018 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1019 (get_ticks_per_sec() / 50));
1020 }
1021
1022 /* We don't know if there are more interrupts pending after this. However,
1023 * the guest will return to userspace in the course of handling this one
1024 * anyways, so we will get a chance to deliver the rest. */
1025 }
1026
1027 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1028 {
1029 }
1030
1031 int kvm_arch_process_async_events(CPUState *cs)
1032 {
1033 return cs->halted;
1034 }
1035
1036 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1037 {
1038 CPUState *cs = CPU(cpu);
1039 CPUPPCState *env = &cpu->env;
1040
1041 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1042 cs->halted = 1;
1043 env->exception_index = EXCP_HLT;
1044 }
1045
1046 return 0;
1047 }
1048
1049 /* map dcr access to existing qemu dcr emulation */
1050 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1051 {
1052 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1053 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1054
1055 return 0;
1056 }
1057
1058 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1059 {
1060 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1061 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1062
1063 return 0;
1064 }
1065
1066 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1067 {
1068 PowerPCCPU *cpu = POWERPC_CPU(cs);
1069 CPUPPCState *env = &cpu->env;
1070 int ret;
1071
1072 switch (run->exit_reason) {
1073 case KVM_EXIT_DCR:
1074 if (run->dcr.is_write) {
1075 dprintf("handle dcr write\n");
1076 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1077 } else {
1078 dprintf("handle dcr read\n");
1079 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1080 }
1081 break;
1082 case KVM_EXIT_HLT:
1083 dprintf("handle halt\n");
1084 ret = kvmppc_handle_halt(cpu);
1085 break;
1086 #if defined(TARGET_PPC64)
1087 case KVM_EXIT_PAPR_HCALL:
1088 dprintf("handle PAPR hypercall\n");
1089 run->papr_hcall.ret = spapr_hypercall(cpu,
1090 run->papr_hcall.nr,
1091 run->papr_hcall.args);
1092 ret = 0;
1093 break;
1094 #endif
1095 case KVM_EXIT_EPR:
1096 dprintf("handle epr\n");
1097 run->epr.epr = ldl_phys(env->mpic_iack);
1098 ret = 0;
1099 break;
1100 case KVM_EXIT_WATCHDOG:
1101 dprintf("handle watchdog expiry\n");
1102 watchdog_perform_action();
1103 ret = 0;
1104 break;
1105
1106 default:
1107 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1108 ret = -1;
1109 break;
1110 }
1111
1112 return ret;
1113 }
1114
1115 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1116 {
1117 CPUState *cs = CPU(cpu);
1118 uint32_t bits = tsr_bits;
1119 struct kvm_one_reg reg = {
1120 .id = KVM_REG_PPC_OR_TSR,
1121 .addr = (uintptr_t) &bits,
1122 };
1123
1124 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1125 }
1126
1127 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1128 {
1129
1130 CPUState *cs = CPU(cpu);
1131 uint32_t bits = tsr_bits;
1132 struct kvm_one_reg reg = {
1133 .id = KVM_REG_PPC_CLEAR_TSR,
1134 .addr = (uintptr_t) &bits,
1135 };
1136
1137 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1138 }
1139
1140 int kvmppc_set_tcr(PowerPCCPU *cpu)
1141 {
1142 CPUState *cs = CPU(cpu);
1143 CPUPPCState *env = &cpu->env;
1144 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1145
1146 struct kvm_one_reg reg = {
1147 .id = KVM_REG_PPC_TCR,
1148 .addr = (uintptr_t) &tcr,
1149 };
1150
1151 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1152 }
1153
1154 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1155 {
1156 CPUState *cs = CPU(cpu);
1157 struct kvm_enable_cap encap = {};
1158 int ret;
1159
1160 if (!kvm_enabled()) {
1161 return -1;
1162 }
1163
1164 if (!cap_ppc_watchdog) {
1165 printf("warning: KVM does not support watchdog");
1166 return -1;
1167 }
1168
1169 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1170 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1171 if (ret < 0) {
1172 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1173 __func__, strerror(-ret));
1174 return ret;
1175 }
1176
1177 return ret;
1178 }
1179
1180 static int read_cpuinfo(const char *field, char *value, int len)
1181 {
1182 FILE *f;
1183 int ret = -1;
1184 int field_len = strlen(field);
1185 char line[512];
1186
1187 f = fopen("/proc/cpuinfo", "r");
1188 if (!f) {
1189 return -1;
1190 }
1191
1192 do {
1193 if(!fgets(line, sizeof(line), f)) {
1194 break;
1195 }
1196 if (!strncmp(line, field, field_len)) {
1197 pstrcpy(value, len, line);
1198 ret = 0;
1199 break;
1200 }
1201 } while(*line);
1202
1203 fclose(f);
1204
1205 return ret;
1206 }
1207
1208 uint32_t kvmppc_get_tbfreq(void)
1209 {
1210 char line[512];
1211 char *ns;
1212 uint32_t retval = get_ticks_per_sec();
1213
1214 if (read_cpuinfo("timebase", line, sizeof(line))) {
1215 return retval;
1216 }
1217
1218 if (!(ns = strchr(line, ':'))) {
1219 return retval;
1220 }
1221
1222 ns++;
1223
1224 retval = atoi(ns);
1225 return retval;
1226 }
1227
1228 /* Try to find a device tree node for a CPU with clock-frequency property */
1229 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1230 {
1231 struct dirent *dirp;
1232 DIR *dp;
1233
1234 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1235 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1236 return -1;
1237 }
1238
1239 buf[0] = '\0';
1240 while ((dirp = readdir(dp)) != NULL) {
1241 FILE *f;
1242 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1243 dirp->d_name);
1244 f = fopen(buf, "r");
1245 if (f) {
1246 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1247 fclose(f);
1248 break;
1249 }
1250 buf[0] = '\0';
1251 }
1252 closedir(dp);
1253 if (buf[0] == '\0') {
1254 printf("Unknown host!\n");
1255 return -1;
1256 }
1257
1258 return 0;
1259 }
1260
1261 /* Read a CPU node property from the host device tree that's a single
1262 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1263 * (can't find or open the property, or doesn't understand the
1264 * format) */
1265 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1266 {
1267 char buf[PATH_MAX];
1268 union {
1269 uint32_t v32;
1270 uint64_t v64;
1271 } u;
1272 FILE *f;
1273 int len;
1274
1275 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1276 return -1;
1277 }
1278
1279 strncat(buf, "/", sizeof(buf) - strlen(buf));
1280 strncat(buf, propname, sizeof(buf) - strlen(buf));
1281
1282 f = fopen(buf, "rb");
1283 if (!f) {
1284 return -1;
1285 }
1286
1287 len = fread(&u, 1, sizeof(u), f);
1288 fclose(f);
1289 switch (len) {
1290 case 4:
1291 /* property is a 32-bit quantity */
1292 return be32_to_cpu(u.v32);
1293 case 8:
1294 return be64_to_cpu(u.v64);
1295 }
1296
1297 return 0;
1298 }
1299
1300 uint64_t kvmppc_get_clockfreq(void)
1301 {
1302 return kvmppc_read_int_cpu_dt("clock-frequency");
1303 }
1304
1305 uint32_t kvmppc_get_vmx(void)
1306 {
1307 return kvmppc_read_int_cpu_dt("ibm,vmx");
1308 }
1309
1310 uint32_t kvmppc_get_dfp(void)
1311 {
1312 return kvmppc_read_int_cpu_dt("ibm,dfp");
1313 }
1314
1315 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1316 {
1317 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1318 CPUState *cs = CPU(cpu);
1319
1320 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1321 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1322 return 0;
1323 }
1324
1325 return 1;
1326 }
1327
1328 int kvmppc_get_hasidle(CPUPPCState *env)
1329 {
1330 struct kvm_ppc_pvinfo pvinfo;
1331
1332 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1333 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1334 return 1;
1335 }
1336
1337 return 0;
1338 }
1339
1340 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1341 {
1342 uint32_t *hc = (uint32_t*)buf;
1343 struct kvm_ppc_pvinfo pvinfo;
1344
1345 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1346 memcpy(buf, pvinfo.hcall, buf_len);
1347 return 0;
1348 }
1349
1350 /*
1351 * Fallback to always fail hypercalls:
1352 *
1353 * li r3, -1
1354 * nop
1355 * nop
1356 * nop
1357 */
1358
1359 hc[0] = 0x3860ffff;
1360 hc[1] = 0x60000000;
1361 hc[2] = 0x60000000;
1362 hc[3] = 0x60000000;
1363
1364 return 0;
1365 }
1366
1367 void kvmppc_set_papr(PowerPCCPU *cpu)
1368 {
1369 CPUPPCState *env = &cpu->env;
1370 CPUState *cs = CPU(cpu);
1371 struct kvm_enable_cap cap = {};
1372 int ret;
1373
1374 cap.cap = KVM_CAP_PPC_PAPR;
1375 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1376
1377 if (ret) {
1378 cpu_abort(env, "This KVM version does not support PAPR\n");
1379 }
1380 }
1381
1382 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1383 {
1384 CPUPPCState *env = &cpu->env;
1385 CPUState *cs = CPU(cpu);
1386 struct kvm_enable_cap cap = {};
1387 int ret;
1388
1389 cap.cap = KVM_CAP_PPC_EPR;
1390 cap.args[0] = mpic_proxy;
1391 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1392
1393 if (ret && mpic_proxy) {
1394 cpu_abort(env, "This KVM version does not support EPR\n");
1395 }
1396 }
1397
1398 int kvmppc_smt_threads(void)
1399 {
1400 return cap_ppc_smt ? cap_ppc_smt : 1;
1401 }
1402
1403 #ifdef TARGET_PPC64
1404 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1405 {
1406 void *rma;
1407 off_t size;
1408 int fd;
1409 struct kvm_allocate_rma ret;
1410 MemoryRegion *rma_region;
1411
1412 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1413 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1414 * not necessary on this hardware
1415 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1416 *
1417 * FIXME: We should allow the user to force contiguous RMA
1418 * allocation in the cap_ppc_rma==1 case.
1419 */
1420 if (cap_ppc_rma < 2) {
1421 return 0;
1422 }
1423
1424 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1425 if (fd < 0) {
1426 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1427 strerror(errno));
1428 return -1;
1429 }
1430
1431 size = MIN(ret.rma_size, 256ul << 20);
1432
1433 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1434 if (rma == MAP_FAILED) {
1435 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1436 return -1;
1437 };
1438
1439 rma_region = g_new(MemoryRegion, 1);
1440 memory_region_init_ram_ptr(rma_region, name, size, rma);
1441 vmstate_register_ram_global(rma_region);
1442 memory_region_add_subregion(sysmem, 0, rma_region);
1443
1444 return size;
1445 }
1446
1447 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1448 {
1449 if (cap_ppc_rma >= 2) {
1450 return current_size;
1451 }
1452 return MIN(current_size,
1453 getrampagesize() << (hash_shift - 7));
1454 }
1455 #endif
1456
1457 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1458 {
1459 struct kvm_create_spapr_tce args = {
1460 .liobn = liobn,
1461 .window_size = window_size,
1462 };
1463 long len;
1464 int fd;
1465 void *table;
1466
1467 /* Must set fd to -1 so we don't try to munmap when called for
1468 * destroying the table, which the upper layers -will- do
1469 */
1470 *pfd = -1;
1471 if (!cap_spapr_tce) {
1472 return NULL;
1473 }
1474
1475 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1476 if (fd < 0) {
1477 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1478 liobn);
1479 return NULL;
1480 }
1481
1482 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1483 /* FIXME: round this up to page size */
1484
1485 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1486 if (table == MAP_FAILED) {
1487 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1488 liobn);
1489 close(fd);
1490 return NULL;
1491 }
1492
1493 *pfd = fd;
1494 return table;
1495 }
1496
1497 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1498 {
1499 long len;
1500
1501 if (fd < 0) {
1502 return -1;
1503 }
1504
1505 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1506 if ((munmap(table, len) < 0) ||
1507 (close(fd) < 0)) {
1508 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1509 strerror(errno));
1510 /* Leak the table */
1511 }
1512
1513 return 0;
1514 }
1515
1516 int kvmppc_reset_htab(int shift_hint)
1517 {
1518 uint32_t shift = shift_hint;
1519
1520 if (!kvm_enabled()) {
1521 /* Full emulation, tell caller to allocate htab itself */
1522 return 0;
1523 }
1524 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1525 int ret;
1526 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1527 if (ret == -ENOTTY) {
1528 /* At least some versions of PR KVM advertise the
1529 * capability, but don't implement the ioctl(). Oops.
1530 * Return 0 so that we allocate the htab in qemu, as is
1531 * correct for PR. */
1532 return 0;
1533 } else if (ret < 0) {
1534 return ret;
1535 }
1536 return shift;
1537 }
1538
1539 /* We have a kernel that predates the htab reset calls. For PR
1540 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1541 * this era, it has allocated a 16MB fixed size hash table
1542 * already. Kernels of this era have the GET_PVINFO capability
1543 * only on PR, so we use this hack to determine the right
1544 * answer */
1545 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1546 /* PR - tell caller to allocate htab */
1547 return 0;
1548 } else {
1549 /* HV - assume 16MB kernel allocated htab */
1550 return 24;
1551 }
1552 }
1553
1554 static inline uint32_t mfpvr(void)
1555 {
1556 uint32_t pvr;
1557
1558 asm ("mfpvr %0"
1559 : "=r"(pvr));
1560 return pvr;
1561 }
1562
1563 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1564 {
1565 if (on) {
1566 *word |= flags;
1567 } else {
1568 *word &= ~flags;
1569 }
1570 }
1571
1572 static void kvmppc_host_cpu_initfn(Object *obj)
1573 {
1574 assert(kvm_enabled());
1575 }
1576
1577 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1578 {
1579 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1580 uint32_t vmx = kvmppc_get_vmx();
1581 uint32_t dfp = kvmppc_get_dfp();
1582
1583 /* Now fix up the class with information we can query from the host */
1584
1585 if (vmx != -1) {
1586 /* Only override when we know what the host supports */
1587 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1588 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1589 }
1590 if (dfp != -1) {
1591 /* Only override when we know what the host supports */
1592 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1593 }
1594 }
1595
1596 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1597 {
1598 CPUState *cs = CPU(cpu);
1599 int smt;
1600
1601 /* Adjust cpu index for SMT */
1602 smt = kvmppc_smt_threads();
1603 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1604 + (cs->cpu_index % smp_threads);
1605
1606 return 0;
1607 }
1608
1609 bool kvmppc_has_cap_epr(void)
1610 {
1611 return cap_epr;
1612 }
1613
1614 static int kvm_ppc_register_host_cpu_type(void)
1615 {
1616 TypeInfo type_info = {
1617 .name = TYPE_HOST_POWERPC_CPU,
1618 .instance_init = kvmppc_host_cpu_initfn,
1619 .class_init = kvmppc_host_cpu_class_init,
1620 };
1621 uint32_t host_pvr = mfpvr();
1622 PowerPCCPUClass *pvr_pcc;
1623
1624 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1625 if (pvr_pcc == NULL) {
1626 return -1;
1627 }
1628 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1629 type_register(&type_info);
1630 return 0;
1631 }
1632
1633
1634 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1635 {
1636 return true;
1637 }
1638
1639 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1640 {
1641 return 1;
1642 }
1643
1644 int kvm_arch_on_sigbus(int code, void *addr)
1645 {
1646 return 1;
1647 }