]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
Merge remote-tracking branch 'mjt/trivial-patches-next' into staging
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
34
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
65 static int cap_epr;
66 static int cap_ppc_watchdog;
67 static int cap_papr;
68
69 /* XXX We have a race condition where we actually have a level triggered
70 * interrupt, but the infrastructure can't expose that yet, so the guest
71 * takes but ignores it, goes to sleep and never gets notified that there's
72 * still an interrupt pending.
73 *
74 * As a quick workaround, let's just wake up again 20 ms after we injected
75 * an interrupt. That way we can assure that we're always reinjecting
76 * interrupts in case the guest swallowed them.
77 */
78 static QEMUTimer *idle_timer;
79
80 static void kvm_kick_cpu(void *opaque)
81 {
82 PowerPCCPU *cpu = opaque;
83
84 qemu_cpu_kick(CPU(cpu));
85 }
86
87 static int kvm_ppc_register_host_cpu_type(void);
88
89 int kvm_arch_init(KVMState *s)
90 {
91 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
92 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
93 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
94 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
95 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
96 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
97 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
98 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
99 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
100 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
101 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
102 /* Note: we don't set cap_papr here, because this capability is
103 * only activated after this by kvmppc_set_papr() */
104
105 if (!cap_interrupt_level) {
106 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
107 "VM to stall at times!\n");
108 }
109
110 kvm_ppc_register_host_cpu_type();
111
112 return 0;
113 }
114
115 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
116 {
117 CPUPPCState *cenv = &cpu->env;
118 CPUState *cs = CPU(cpu);
119 struct kvm_sregs sregs;
120 int ret;
121
122 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
123 /* What we're really trying to say is "if we're on BookE, we use
124 the native PVR for now". This is the only sane way to check
125 it though, so we potentially confuse users that they can run
126 BookE guests on BookS. Let's hope nobody dares enough :) */
127 return 0;
128 } else {
129 if (!cap_segstate) {
130 fprintf(stderr, "kvm error: missing PVR setting capability\n");
131 return -ENOSYS;
132 }
133 }
134
135 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
136 if (ret) {
137 return ret;
138 }
139
140 sregs.pvr = cenv->spr[SPR_PVR];
141 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
142 }
143
144 /* Set up a shared TLB array with KVM */
145 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
146 {
147 CPUPPCState *env = &cpu->env;
148 CPUState *cs = CPU(cpu);
149 struct kvm_book3e_206_tlb_params params = {};
150 struct kvm_config_tlb cfg = {};
151 struct kvm_enable_cap encap = {};
152 unsigned int entries = 0;
153 int ret, i;
154
155 if (!kvm_enabled() ||
156 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
157 return 0;
158 }
159
160 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
161
162 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
163 params.tlb_sizes[i] = booke206_tlb_size(env, i);
164 params.tlb_ways[i] = booke206_tlb_ways(env, i);
165 entries += params.tlb_sizes[i];
166 }
167
168 assert(entries == env->nb_tlb);
169 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
170
171 env->tlb_dirty = true;
172
173 cfg.array = (uintptr_t)env->tlb.tlbm;
174 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
175 cfg.params = (uintptr_t)&params;
176 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
177
178 encap.cap = KVM_CAP_SW_TLB;
179 encap.args[0] = (uintptr_t)&cfg;
180
181 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
182 if (ret < 0) {
183 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184 __func__, strerror(-ret));
185 return ret;
186 }
187
188 env->kvm_sw_tlb = true;
189 return 0;
190 }
191
192
193 #if defined(TARGET_PPC64)
194 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
195 struct kvm_ppc_smmu_info *info)
196 {
197 CPUPPCState *env = &cpu->env;
198 CPUState *cs = CPU(cpu);
199
200 memset(info, 0, sizeof(*info));
201
202 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203 * need to "guess" what the supported page sizes are.
204 *
205 * For that to work we make a few assumptions:
206 *
207 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208 * KVM which only supports 4K and 16M pages, but supports them
209 * regardless of the backing store characteritics. We also don't
210 * support 1T segments.
211 *
212 * This is safe as if HV KVM ever supports that capability or PR
213 * KVM grows supports for more page/segment sizes, those versions
214 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215 * will not hit this fallback
216 *
217 * - Else we are running HV KVM. This means we only support page
218 * sizes that fit in the backing store. Additionally we only
219 * advertize 64K pages if the processor is ARCH 2.06 and we assume
220 * P7 encodings for the SLB and hash table. Here too, we assume
221 * support for any newer processor will mean a kernel that
222 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
223 * this fallback.
224 */
225 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
226 /* No flags */
227 info->flags = 0;
228 info->slb_size = 64;
229
230 /* Standard 4k base page size segment */
231 info->sps[0].page_shift = 12;
232 info->sps[0].slb_enc = 0;
233 info->sps[0].enc[0].page_shift = 12;
234 info->sps[0].enc[0].pte_enc = 0;
235
236 /* Standard 16M large page size segment */
237 info->sps[1].page_shift = 24;
238 info->sps[1].slb_enc = SLB_VSID_L;
239 info->sps[1].enc[0].page_shift = 24;
240 info->sps[1].enc[0].pte_enc = 0;
241 } else {
242 int i = 0;
243
244 /* HV KVM has backing store size restrictions */
245 info->flags = KVM_PPC_PAGE_SIZES_REAL;
246
247 if (env->mmu_model & POWERPC_MMU_1TSEG) {
248 info->flags |= KVM_PPC_1T_SEGMENTS;
249 }
250
251 if (env->mmu_model == POWERPC_MMU_2_06) {
252 info->slb_size = 32;
253 } else {
254 info->slb_size = 64;
255 }
256
257 /* Standard 4k base page size segment */
258 info->sps[i].page_shift = 12;
259 info->sps[i].slb_enc = 0;
260 info->sps[i].enc[0].page_shift = 12;
261 info->sps[i].enc[0].pte_enc = 0;
262 i++;
263
264 /* 64K on MMU 2.06 */
265 if (env->mmu_model == POWERPC_MMU_2_06) {
266 info->sps[i].page_shift = 16;
267 info->sps[i].slb_enc = 0x110;
268 info->sps[i].enc[0].page_shift = 16;
269 info->sps[i].enc[0].pte_enc = 1;
270 i++;
271 }
272
273 /* Standard 16M large page size segment */
274 info->sps[i].page_shift = 24;
275 info->sps[i].slb_enc = SLB_VSID_L;
276 info->sps[i].enc[0].page_shift = 24;
277 info->sps[i].enc[0].pte_enc = 0;
278 }
279 }
280
281 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
282 {
283 CPUState *cs = CPU(cpu);
284 int ret;
285
286 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
287 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
288 if (ret == 0) {
289 return;
290 }
291 }
292
293 kvm_get_fallback_smmu_info(cpu, info);
294 }
295
296 static long getrampagesize(void)
297 {
298 struct statfs fs;
299 int ret;
300
301 if (!mem_path) {
302 /* guest RAM is backed by normal anonymous pages */
303 return getpagesize();
304 }
305
306 do {
307 ret = statfs(mem_path, &fs);
308 } while (ret != 0 && errno == EINTR);
309
310 if (ret != 0) {
311 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
312 strerror(errno));
313 exit(1);
314 }
315
316 #define HUGETLBFS_MAGIC 0x958458f6
317
318 if (fs.f_type != HUGETLBFS_MAGIC) {
319 /* Explicit mempath, but it's ordinary pages */
320 return getpagesize();
321 }
322
323 /* It's hugepage, return the huge page size */
324 return fs.f_bsize;
325 }
326
327 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
328 {
329 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
330 return true;
331 }
332
333 return (1ul << shift) <= rampgsize;
334 }
335
336 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
337 {
338 static struct kvm_ppc_smmu_info smmu_info;
339 static bool has_smmu_info;
340 CPUPPCState *env = &cpu->env;
341 long rampagesize;
342 int iq, ik, jq, jk;
343
344 /* We only handle page sizes for 64-bit server guests for now */
345 if (!(env->mmu_model & POWERPC_MMU_64)) {
346 return;
347 }
348
349 /* Collect MMU info from kernel if not already */
350 if (!has_smmu_info) {
351 kvm_get_smmu_info(cpu, &smmu_info);
352 has_smmu_info = true;
353 }
354
355 rampagesize = getrampagesize();
356
357 /* Convert to QEMU form */
358 memset(&env->sps, 0, sizeof(env->sps));
359
360 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
361 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
362 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
363
364 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365 ksps->page_shift)) {
366 continue;
367 }
368 qsps->page_shift = ksps->page_shift;
369 qsps->slb_enc = ksps->slb_enc;
370 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
371 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
372 ksps->enc[jk].page_shift)) {
373 continue;
374 }
375 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
376 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
377 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
378 break;
379 }
380 }
381 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
382 break;
383 }
384 }
385 env->slb_nr = smmu_info.slb_size;
386 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
387 env->mmu_model |= POWERPC_MMU_1TSEG;
388 } else {
389 env->mmu_model &= ~POWERPC_MMU_1TSEG;
390 }
391 }
392 #else /* defined (TARGET_PPC64) */
393
394 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
395 {
396 }
397
398 #endif /* !defined (TARGET_PPC64) */
399
400 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
401 {
402 return cpu->cpu_index;
403 }
404
405 int kvm_arch_init_vcpu(CPUState *cs)
406 {
407 PowerPCCPU *cpu = POWERPC_CPU(cs);
408 CPUPPCState *cenv = &cpu->env;
409 int ret;
410
411 /* Gather server mmu info from KVM and update the CPU state */
412 kvm_fixup_page_sizes(cpu);
413
414 /* Synchronize sregs with kvm */
415 ret = kvm_arch_sync_sregs(cpu);
416 if (ret) {
417 return ret;
418 }
419
420 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
421
422 /* Some targets support access to KVM's guest TLB. */
423 switch (cenv->mmu_model) {
424 case POWERPC_MMU_BOOKE206:
425 ret = kvm_booke206_tlb_init(cpu);
426 break;
427 default:
428 break;
429 }
430
431 return ret;
432 }
433
434 void kvm_arch_reset_vcpu(CPUState *cpu)
435 {
436 }
437
438 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
439 {
440 CPUPPCState *env = &cpu->env;
441 CPUState *cs = CPU(cpu);
442 struct kvm_dirty_tlb dirty_tlb;
443 unsigned char *bitmap;
444 int ret;
445
446 if (!env->kvm_sw_tlb) {
447 return;
448 }
449
450 bitmap = g_malloc((env->nb_tlb + 7) / 8);
451 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
452
453 dirty_tlb.bitmap = (uintptr_t)bitmap;
454 dirty_tlb.num_dirty = env->nb_tlb;
455
456 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
457 if (ret) {
458 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__, strerror(-ret));
460 }
461
462 g_free(bitmap);
463 }
464
465 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
466 {
467 PowerPCCPU *cpu = POWERPC_CPU(cs);
468 CPUPPCState *env = &cpu->env;
469 union {
470 uint32_t u32;
471 uint64_t u64;
472 } val;
473 struct kvm_one_reg reg = {
474 .id = id,
475 .addr = (uintptr_t) &val,
476 };
477 int ret;
478
479 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
480 if (ret != 0) {
481 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
482 spr, strerror(errno));
483 } else {
484 switch (id & KVM_REG_SIZE_MASK) {
485 case KVM_REG_SIZE_U32:
486 env->spr[spr] = val.u32;
487 break;
488
489 case KVM_REG_SIZE_U64:
490 env->spr[spr] = val.u64;
491 break;
492
493 default:
494 /* Don't handle this size yet */
495 abort();
496 }
497 }
498 }
499
500 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
501 {
502 PowerPCCPU *cpu = POWERPC_CPU(cs);
503 CPUPPCState *env = &cpu->env;
504 union {
505 uint32_t u32;
506 uint64_t u64;
507 } val;
508 struct kvm_one_reg reg = {
509 .id = id,
510 .addr = (uintptr_t) &val,
511 };
512 int ret;
513
514 switch (id & KVM_REG_SIZE_MASK) {
515 case KVM_REG_SIZE_U32:
516 val.u32 = env->spr[spr];
517 break;
518
519 case KVM_REG_SIZE_U64:
520 val.u64 = env->spr[spr];
521 break;
522
523 default:
524 /* Don't handle this size yet */
525 abort();
526 }
527
528 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
529 if (ret != 0) {
530 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
531 spr, strerror(errno));
532 }
533 }
534
535 static int kvm_put_fp(CPUState *cs)
536 {
537 PowerPCCPU *cpu = POWERPC_CPU(cs);
538 CPUPPCState *env = &cpu->env;
539 struct kvm_one_reg reg;
540 int i;
541 int ret;
542
543 if (env->insns_flags & PPC_FLOAT) {
544 uint64_t fpscr = env->fpscr;
545 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
546
547 reg.id = KVM_REG_PPC_FPSCR;
548 reg.addr = (uintptr_t)&fpscr;
549 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
550 if (ret < 0) {
551 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
552 return ret;
553 }
554
555 for (i = 0; i < 32; i++) {
556 uint64_t vsr[2];
557
558 vsr[0] = float64_val(env->fpr[i]);
559 vsr[1] = env->vsr[i];
560 reg.addr = (uintptr_t) &vsr;
561 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
562
563 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
564 if (ret < 0) {
565 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
566 i, strerror(errno));
567 return ret;
568 }
569 }
570 }
571
572 if (env->insns_flags & PPC_ALTIVEC) {
573 reg.id = KVM_REG_PPC_VSCR;
574 reg.addr = (uintptr_t)&env->vscr;
575 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
576 if (ret < 0) {
577 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
578 return ret;
579 }
580
581 for (i = 0; i < 32; i++) {
582 reg.id = KVM_REG_PPC_VR(i);
583 reg.addr = (uintptr_t)&env->avr[i];
584 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
585 if (ret < 0) {
586 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
587 return ret;
588 }
589 }
590 }
591
592 return 0;
593 }
594
595 static int kvm_get_fp(CPUState *cs)
596 {
597 PowerPCCPU *cpu = POWERPC_CPU(cs);
598 CPUPPCState *env = &cpu->env;
599 struct kvm_one_reg reg;
600 int i;
601 int ret;
602
603 if (env->insns_flags & PPC_FLOAT) {
604 uint64_t fpscr;
605 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
606
607 reg.id = KVM_REG_PPC_FPSCR;
608 reg.addr = (uintptr_t)&fpscr;
609 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
610 if (ret < 0) {
611 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
612 return ret;
613 } else {
614 env->fpscr = fpscr;
615 }
616
617 for (i = 0; i < 32; i++) {
618 uint64_t vsr[2];
619
620 reg.addr = (uintptr_t) &vsr;
621 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
622
623 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
624 if (ret < 0) {
625 dprintf("Unable to get %s%d from KVM: %s\n",
626 vsx ? "VSR" : "FPR", i, strerror(errno));
627 return ret;
628 } else {
629 env->fpr[i] = vsr[0];
630 if (vsx) {
631 env->vsr[i] = vsr[1];
632 }
633 }
634 }
635 }
636
637 if (env->insns_flags & PPC_ALTIVEC) {
638 reg.id = KVM_REG_PPC_VSCR;
639 reg.addr = (uintptr_t)&env->vscr;
640 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
641 if (ret < 0) {
642 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
643 return ret;
644 }
645
646 for (i = 0; i < 32; i++) {
647 reg.id = KVM_REG_PPC_VR(i);
648 reg.addr = (uintptr_t)&env->avr[i];
649 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650 if (ret < 0) {
651 dprintf("Unable to get VR%d from KVM: %s\n",
652 i, strerror(errno));
653 return ret;
654 }
655 }
656 }
657
658 return 0;
659 }
660
661 #if defined(TARGET_PPC64)
662 static int kvm_get_vpa(CPUState *cs)
663 {
664 PowerPCCPU *cpu = POWERPC_CPU(cs);
665 CPUPPCState *env = &cpu->env;
666 struct kvm_one_reg reg;
667 int ret;
668
669 reg.id = KVM_REG_PPC_VPA_ADDR;
670 reg.addr = (uintptr_t)&env->vpa_addr;
671 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
672 if (ret < 0) {
673 dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno));
674 return ret;
675 }
676
677 assert((uintptr_t)&env->slb_shadow_size
678 == ((uintptr_t)&env->slb_shadow_addr + 8));
679 reg.id = KVM_REG_PPC_VPA_SLB;
680 reg.addr = (uintptr_t)&env->slb_shadow_addr;
681 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
682 if (ret < 0) {
683 dprintf("Unable to get SLB shadow state from KVM: %s\n",
684 strerror(errno));
685 return ret;
686 }
687
688 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
689 reg.id = KVM_REG_PPC_VPA_DTL;
690 reg.addr = (uintptr_t)&env->dtl_addr;
691 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
692 if (ret < 0) {
693 dprintf("Unable to get dispatch trace log state from KVM: %s\n",
694 strerror(errno));
695 return ret;
696 }
697
698 return 0;
699 }
700
701 static int kvm_put_vpa(CPUState *cs)
702 {
703 PowerPCCPU *cpu = POWERPC_CPU(cs);
704 CPUPPCState *env = &cpu->env;
705 struct kvm_one_reg reg;
706 int ret;
707
708 /* SLB shadow or DTL can't be registered unless a master VPA is
709 * registered. That means when restoring state, if a VPA *is*
710 * registered, we need to set that up first. If not, we need to
711 * deregister the others before deregistering the master VPA */
712 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
713
714 if (env->vpa_addr) {
715 reg.id = KVM_REG_PPC_VPA_ADDR;
716 reg.addr = (uintptr_t)&env->vpa_addr;
717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718 if (ret < 0) {
719 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
720 return ret;
721 }
722 }
723
724 assert((uintptr_t)&env->slb_shadow_size
725 == ((uintptr_t)&env->slb_shadow_addr + 8));
726 reg.id = KVM_REG_PPC_VPA_SLB;
727 reg.addr = (uintptr_t)&env->slb_shadow_addr;
728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729 if (ret < 0) {
730 dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
731 return ret;
732 }
733
734 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
735 reg.id = KVM_REG_PPC_VPA_DTL;
736 reg.addr = (uintptr_t)&env->dtl_addr;
737 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
738 if (ret < 0) {
739 dprintf("Unable to set dispatch trace log state to KVM: %s\n",
740 strerror(errno));
741 return ret;
742 }
743
744 if (!env->vpa_addr) {
745 reg.id = KVM_REG_PPC_VPA_ADDR;
746 reg.addr = (uintptr_t)&env->vpa_addr;
747 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
748 if (ret < 0) {
749 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
750 return ret;
751 }
752 }
753
754 return 0;
755 }
756 #endif /* TARGET_PPC64 */
757
758 int kvm_arch_put_registers(CPUState *cs, int level)
759 {
760 PowerPCCPU *cpu = POWERPC_CPU(cs);
761 CPUPPCState *env = &cpu->env;
762 struct kvm_regs regs;
763 int ret;
764 int i;
765
766 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
767 if (ret < 0) {
768 return ret;
769 }
770
771 regs.ctr = env->ctr;
772 regs.lr = env->lr;
773 regs.xer = cpu_read_xer(env);
774 regs.msr = env->msr;
775 regs.pc = env->nip;
776
777 regs.srr0 = env->spr[SPR_SRR0];
778 regs.srr1 = env->spr[SPR_SRR1];
779
780 regs.sprg0 = env->spr[SPR_SPRG0];
781 regs.sprg1 = env->spr[SPR_SPRG1];
782 regs.sprg2 = env->spr[SPR_SPRG2];
783 regs.sprg3 = env->spr[SPR_SPRG3];
784 regs.sprg4 = env->spr[SPR_SPRG4];
785 regs.sprg5 = env->spr[SPR_SPRG5];
786 regs.sprg6 = env->spr[SPR_SPRG6];
787 regs.sprg7 = env->spr[SPR_SPRG7];
788
789 regs.pid = env->spr[SPR_BOOKE_PID];
790
791 for (i = 0;i < 32; i++)
792 regs.gpr[i] = env->gpr[i];
793
794 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
795 if (ret < 0)
796 return ret;
797
798 kvm_put_fp(cs);
799
800 if (env->tlb_dirty) {
801 kvm_sw_tlb_put(cpu);
802 env->tlb_dirty = false;
803 }
804
805 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
806 struct kvm_sregs sregs;
807
808 sregs.pvr = env->spr[SPR_PVR];
809
810 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
811
812 /* Sync SLB */
813 #ifdef TARGET_PPC64
814 for (i = 0; i < 64; i++) {
815 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
816 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
817 }
818 #endif
819
820 /* Sync SRs */
821 for (i = 0; i < 16; i++) {
822 sregs.u.s.ppc32.sr[i] = env->sr[i];
823 }
824
825 /* Sync BATs */
826 for (i = 0; i < 8; i++) {
827 /* Beware. We have to swap upper and lower bits here */
828 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
829 | env->DBAT[1][i];
830 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
831 | env->IBAT[1][i];
832 }
833
834 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
835 if (ret) {
836 return ret;
837 }
838 }
839
840 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
841 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
842 }
843
844 if (cap_one_reg) {
845 int i;
846
847 /* We deliberately ignore errors here, for kernels which have
848 * the ONE_REG calls, but don't support the specific
849 * registers, there's a reasonable chance things will still
850 * work, at least until we try to migrate. */
851 for (i = 0; i < 1024; i++) {
852 uint64_t id = env->spr_cb[i].one_reg_id;
853
854 if (id != 0) {
855 kvm_put_one_spr(cs, id, i);
856 }
857 }
858
859 #ifdef TARGET_PPC64
860 if (cap_papr) {
861 if (kvm_put_vpa(cs) < 0) {
862 dprintf("Warning: Unable to set VPA information to KVM\n");
863 }
864 }
865 #endif /* TARGET_PPC64 */
866 }
867
868 return ret;
869 }
870
871 int kvm_arch_get_registers(CPUState *cs)
872 {
873 PowerPCCPU *cpu = POWERPC_CPU(cs);
874 CPUPPCState *env = &cpu->env;
875 struct kvm_regs regs;
876 struct kvm_sregs sregs;
877 uint32_t cr;
878 int i, ret;
879
880 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
881 if (ret < 0)
882 return ret;
883
884 cr = regs.cr;
885 for (i = 7; i >= 0; i--) {
886 env->crf[i] = cr & 15;
887 cr >>= 4;
888 }
889
890 env->ctr = regs.ctr;
891 env->lr = regs.lr;
892 cpu_write_xer(env, regs.xer);
893 env->msr = regs.msr;
894 env->nip = regs.pc;
895
896 env->spr[SPR_SRR0] = regs.srr0;
897 env->spr[SPR_SRR1] = regs.srr1;
898
899 env->spr[SPR_SPRG0] = regs.sprg0;
900 env->spr[SPR_SPRG1] = regs.sprg1;
901 env->spr[SPR_SPRG2] = regs.sprg2;
902 env->spr[SPR_SPRG3] = regs.sprg3;
903 env->spr[SPR_SPRG4] = regs.sprg4;
904 env->spr[SPR_SPRG5] = regs.sprg5;
905 env->spr[SPR_SPRG6] = regs.sprg6;
906 env->spr[SPR_SPRG7] = regs.sprg7;
907
908 env->spr[SPR_BOOKE_PID] = regs.pid;
909
910 for (i = 0;i < 32; i++)
911 env->gpr[i] = regs.gpr[i];
912
913 kvm_get_fp(cs);
914
915 if (cap_booke_sregs) {
916 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
917 if (ret < 0) {
918 return ret;
919 }
920
921 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
922 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
923 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
924 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
925 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
926 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
927 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
928 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
929 env->spr[SPR_DECR] = sregs.u.e.dec;
930 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
931 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
932 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
933 }
934
935 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
936 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
937 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
938 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
939 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
940 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
941 }
942
943 if (sregs.u.e.features & KVM_SREGS_E_64) {
944 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
945 }
946
947 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
948 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
949 }
950
951 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
952 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
953 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
954 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
955 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
956 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
957 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
958 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
959 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
960 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
961 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
962 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
963 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
964 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
965 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
966 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
967 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
968
969 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
970 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
971 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
972 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
973 }
974
975 if (sregs.u.e.features & KVM_SREGS_E_PM) {
976 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
977 }
978
979 if (sregs.u.e.features & KVM_SREGS_E_PC) {
980 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
981 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
982 }
983 }
984
985 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
986 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
987 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
988 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
989 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
990 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
991 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
992 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
993 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
994 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
995 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
996 }
997
998 if (sregs.u.e.features & KVM_SREGS_EXP) {
999 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1000 }
1001
1002 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1003 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1004 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1005 }
1006
1007 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1008 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1009 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1010 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1011
1012 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1013 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1014 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1015 }
1016 }
1017 }
1018
1019 if (cap_segstate) {
1020 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1021 if (ret < 0) {
1022 return ret;
1023 }
1024
1025 ppc_store_sdr1(env, sregs.u.s.sdr1);
1026
1027 /* Sync SLB */
1028 #ifdef TARGET_PPC64
1029 for (i = 0; i < 64; i++) {
1030 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
1031 sregs.u.s.ppc64.slb[i].slbv);
1032 }
1033 #endif
1034
1035 /* Sync SRs */
1036 for (i = 0; i < 16; i++) {
1037 env->sr[i] = sregs.u.s.ppc32.sr[i];
1038 }
1039
1040 /* Sync BATs */
1041 for (i = 0; i < 8; i++) {
1042 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1043 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1044 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1045 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1046 }
1047 }
1048
1049 if (cap_hior) {
1050 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1051 }
1052
1053 if (cap_one_reg) {
1054 int i;
1055
1056 /* We deliberately ignore errors here, for kernels which have
1057 * the ONE_REG calls, but don't support the specific
1058 * registers, there's a reasonable chance things will still
1059 * work, at least until we try to migrate. */
1060 for (i = 0; i < 1024; i++) {
1061 uint64_t id = env->spr_cb[i].one_reg_id;
1062
1063 if (id != 0) {
1064 kvm_get_one_spr(cs, id, i);
1065 }
1066 }
1067
1068 #ifdef TARGET_PPC64
1069 if (cap_papr) {
1070 if (kvm_get_vpa(cs) < 0) {
1071 dprintf("Warning: Unable to get VPA information from KVM\n");
1072 }
1073 }
1074 #endif
1075 }
1076
1077 return 0;
1078 }
1079
1080 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1081 {
1082 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1083
1084 if (irq != PPC_INTERRUPT_EXT) {
1085 return 0;
1086 }
1087
1088 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1089 return 0;
1090 }
1091
1092 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1093
1094 return 0;
1095 }
1096
1097 #if defined(TARGET_PPCEMB)
1098 #define PPC_INPUT_INT PPC40x_INPUT_INT
1099 #elif defined(TARGET_PPC64)
1100 #define PPC_INPUT_INT PPC970_INPUT_INT
1101 #else
1102 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1103 #endif
1104
1105 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1106 {
1107 PowerPCCPU *cpu = POWERPC_CPU(cs);
1108 CPUPPCState *env = &cpu->env;
1109 int r;
1110 unsigned irq;
1111
1112 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1113 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1114 if (!cap_interrupt_level &&
1115 run->ready_for_interrupt_injection &&
1116 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1117 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1118 {
1119 /* For now KVM disregards the 'irq' argument. However, in the
1120 * future KVM could cache it in-kernel to avoid a heavyweight exit
1121 * when reading the UIC.
1122 */
1123 irq = KVM_INTERRUPT_SET;
1124
1125 dprintf("injected interrupt %d\n", irq);
1126 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1127 if (r < 0) {
1128 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1129 }
1130
1131 /* Always wake up soon in case the interrupt was level based */
1132 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1133 (get_ticks_per_sec() / 50));
1134 }
1135
1136 /* We don't know if there are more interrupts pending after this. However,
1137 * the guest will return to userspace in the course of handling this one
1138 * anyways, so we will get a chance to deliver the rest. */
1139 }
1140
1141 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1142 {
1143 }
1144
1145 int kvm_arch_process_async_events(CPUState *cs)
1146 {
1147 return cs->halted;
1148 }
1149
1150 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1151 {
1152 CPUState *cs = CPU(cpu);
1153 CPUPPCState *env = &cpu->env;
1154
1155 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1156 cs->halted = 1;
1157 env->exception_index = EXCP_HLT;
1158 }
1159
1160 return 0;
1161 }
1162
1163 /* map dcr access to existing qemu dcr emulation */
1164 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1165 {
1166 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1167 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1168
1169 return 0;
1170 }
1171
1172 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1173 {
1174 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1175 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1176
1177 return 0;
1178 }
1179
1180 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1181 {
1182 PowerPCCPU *cpu = POWERPC_CPU(cs);
1183 CPUPPCState *env = &cpu->env;
1184 int ret;
1185
1186 switch (run->exit_reason) {
1187 case KVM_EXIT_DCR:
1188 if (run->dcr.is_write) {
1189 dprintf("handle dcr write\n");
1190 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1191 } else {
1192 dprintf("handle dcr read\n");
1193 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1194 }
1195 break;
1196 case KVM_EXIT_HLT:
1197 dprintf("handle halt\n");
1198 ret = kvmppc_handle_halt(cpu);
1199 break;
1200 #if defined(TARGET_PPC64)
1201 case KVM_EXIT_PAPR_HCALL:
1202 dprintf("handle PAPR hypercall\n");
1203 run->papr_hcall.ret = spapr_hypercall(cpu,
1204 run->papr_hcall.nr,
1205 run->papr_hcall.args);
1206 ret = 0;
1207 break;
1208 #endif
1209 case KVM_EXIT_EPR:
1210 dprintf("handle epr\n");
1211 run->epr.epr = ldl_phys(env->mpic_iack);
1212 ret = 0;
1213 break;
1214 case KVM_EXIT_WATCHDOG:
1215 dprintf("handle watchdog expiry\n");
1216 watchdog_perform_action();
1217 ret = 0;
1218 break;
1219
1220 default:
1221 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1222 ret = -1;
1223 break;
1224 }
1225
1226 return ret;
1227 }
1228
1229 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1230 {
1231 CPUState *cs = CPU(cpu);
1232 uint32_t bits = tsr_bits;
1233 struct kvm_one_reg reg = {
1234 .id = KVM_REG_PPC_OR_TSR,
1235 .addr = (uintptr_t) &bits,
1236 };
1237
1238 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1239 }
1240
1241 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1242 {
1243
1244 CPUState *cs = CPU(cpu);
1245 uint32_t bits = tsr_bits;
1246 struct kvm_one_reg reg = {
1247 .id = KVM_REG_PPC_CLEAR_TSR,
1248 .addr = (uintptr_t) &bits,
1249 };
1250
1251 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1252 }
1253
1254 int kvmppc_set_tcr(PowerPCCPU *cpu)
1255 {
1256 CPUState *cs = CPU(cpu);
1257 CPUPPCState *env = &cpu->env;
1258 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1259
1260 struct kvm_one_reg reg = {
1261 .id = KVM_REG_PPC_TCR,
1262 .addr = (uintptr_t) &tcr,
1263 };
1264
1265 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1266 }
1267
1268 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1269 {
1270 CPUState *cs = CPU(cpu);
1271 struct kvm_enable_cap encap = {};
1272 int ret;
1273
1274 if (!kvm_enabled()) {
1275 return -1;
1276 }
1277
1278 if (!cap_ppc_watchdog) {
1279 printf("warning: KVM does not support watchdog");
1280 return -1;
1281 }
1282
1283 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1284 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1285 if (ret < 0) {
1286 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1287 __func__, strerror(-ret));
1288 return ret;
1289 }
1290
1291 return ret;
1292 }
1293
1294 static int read_cpuinfo(const char *field, char *value, int len)
1295 {
1296 FILE *f;
1297 int ret = -1;
1298 int field_len = strlen(field);
1299 char line[512];
1300
1301 f = fopen("/proc/cpuinfo", "r");
1302 if (!f) {
1303 return -1;
1304 }
1305
1306 do {
1307 if(!fgets(line, sizeof(line), f)) {
1308 break;
1309 }
1310 if (!strncmp(line, field, field_len)) {
1311 pstrcpy(value, len, line);
1312 ret = 0;
1313 break;
1314 }
1315 } while(*line);
1316
1317 fclose(f);
1318
1319 return ret;
1320 }
1321
1322 uint32_t kvmppc_get_tbfreq(void)
1323 {
1324 char line[512];
1325 char *ns;
1326 uint32_t retval = get_ticks_per_sec();
1327
1328 if (read_cpuinfo("timebase", line, sizeof(line))) {
1329 return retval;
1330 }
1331
1332 if (!(ns = strchr(line, ':'))) {
1333 return retval;
1334 }
1335
1336 ns++;
1337
1338 retval = atoi(ns);
1339 return retval;
1340 }
1341
1342 /* Try to find a device tree node for a CPU with clock-frequency property */
1343 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1344 {
1345 struct dirent *dirp;
1346 DIR *dp;
1347
1348 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1349 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1350 return -1;
1351 }
1352
1353 buf[0] = '\0';
1354 while ((dirp = readdir(dp)) != NULL) {
1355 FILE *f;
1356 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1357 dirp->d_name);
1358 f = fopen(buf, "r");
1359 if (f) {
1360 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1361 fclose(f);
1362 break;
1363 }
1364 buf[0] = '\0';
1365 }
1366 closedir(dp);
1367 if (buf[0] == '\0') {
1368 printf("Unknown host!\n");
1369 return -1;
1370 }
1371
1372 return 0;
1373 }
1374
1375 /* Read a CPU node property from the host device tree that's a single
1376 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1377 * (can't find or open the property, or doesn't understand the
1378 * format) */
1379 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1380 {
1381 char buf[PATH_MAX];
1382 union {
1383 uint32_t v32;
1384 uint64_t v64;
1385 } u;
1386 FILE *f;
1387 int len;
1388
1389 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1390 return -1;
1391 }
1392
1393 strncat(buf, "/", sizeof(buf) - strlen(buf));
1394 strncat(buf, propname, sizeof(buf) - strlen(buf));
1395
1396 f = fopen(buf, "rb");
1397 if (!f) {
1398 return -1;
1399 }
1400
1401 len = fread(&u, 1, sizeof(u), f);
1402 fclose(f);
1403 switch (len) {
1404 case 4:
1405 /* property is a 32-bit quantity */
1406 return be32_to_cpu(u.v32);
1407 case 8:
1408 return be64_to_cpu(u.v64);
1409 }
1410
1411 return 0;
1412 }
1413
1414 uint64_t kvmppc_get_clockfreq(void)
1415 {
1416 return kvmppc_read_int_cpu_dt("clock-frequency");
1417 }
1418
1419 uint32_t kvmppc_get_vmx(void)
1420 {
1421 return kvmppc_read_int_cpu_dt("ibm,vmx");
1422 }
1423
1424 uint32_t kvmppc_get_dfp(void)
1425 {
1426 return kvmppc_read_int_cpu_dt("ibm,dfp");
1427 }
1428
1429 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1430 {
1431 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1432 CPUState *cs = CPU(cpu);
1433
1434 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1435 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1436 return 0;
1437 }
1438
1439 return 1;
1440 }
1441
1442 int kvmppc_get_hasidle(CPUPPCState *env)
1443 {
1444 struct kvm_ppc_pvinfo pvinfo;
1445
1446 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1447 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1448 return 1;
1449 }
1450
1451 return 0;
1452 }
1453
1454 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1455 {
1456 uint32_t *hc = (uint32_t*)buf;
1457 struct kvm_ppc_pvinfo pvinfo;
1458
1459 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1460 memcpy(buf, pvinfo.hcall, buf_len);
1461 return 0;
1462 }
1463
1464 /*
1465 * Fallback to always fail hypercalls:
1466 *
1467 * li r3, -1
1468 * nop
1469 * nop
1470 * nop
1471 */
1472
1473 hc[0] = 0x3860ffff;
1474 hc[1] = 0x60000000;
1475 hc[2] = 0x60000000;
1476 hc[3] = 0x60000000;
1477
1478 return 0;
1479 }
1480
1481 void kvmppc_set_papr(PowerPCCPU *cpu)
1482 {
1483 CPUPPCState *env = &cpu->env;
1484 CPUState *cs = CPU(cpu);
1485 struct kvm_enable_cap cap = {};
1486 int ret;
1487
1488 cap.cap = KVM_CAP_PPC_PAPR;
1489 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1490
1491 if (ret) {
1492 cpu_abort(env, "This KVM version does not support PAPR\n");
1493 }
1494
1495 /* Update the capability flag so we sync the right information
1496 * with kvm */
1497 cap_papr = 1;
1498 }
1499
1500 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1501 {
1502 CPUPPCState *env = &cpu->env;
1503 CPUState *cs = CPU(cpu);
1504 struct kvm_enable_cap cap = {};
1505 int ret;
1506
1507 cap.cap = KVM_CAP_PPC_EPR;
1508 cap.args[0] = mpic_proxy;
1509 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1510
1511 if (ret && mpic_proxy) {
1512 cpu_abort(env, "This KVM version does not support EPR\n");
1513 }
1514 }
1515
1516 int kvmppc_smt_threads(void)
1517 {
1518 return cap_ppc_smt ? cap_ppc_smt : 1;
1519 }
1520
1521 #ifdef TARGET_PPC64
1522 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1523 {
1524 void *rma;
1525 off_t size;
1526 int fd;
1527 struct kvm_allocate_rma ret;
1528 MemoryRegion *rma_region;
1529
1530 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1531 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1532 * not necessary on this hardware
1533 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1534 *
1535 * FIXME: We should allow the user to force contiguous RMA
1536 * allocation in the cap_ppc_rma==1 case.
1537 */
1538 if (cap_ppc_rma < 2) {
1539 return 0;
1540 }
1541
1542 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1543 if (fd < 0) {
1544 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1545 strerror(errno));
1546 return -1;
1547 }
1548
1549 size = MIN(ret.rma_size, 256ul << 20);
1550
1551 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1552 if (rma == MAP_FAILED) {
1553 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1554 return -1;
1555 };
1556
1557 rma_region = g_new(MemoryRegion, 1);
1558 memory_region_init_ram_ptr(rma_region, name, size, rma);
1559 vmstate_register_ram_global(rma_region);
1560 memory_region_add_subregion(sysmem, 0, rma_region);
1561
1562 return size;
1563 }
1564
1565 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1566 {
1567 struct kvm_ppc_smmu_info info;
1568 long rampagesize, best_page_shift;
1569 int i;
1570
1571 if (cap_ppc_rma >= 2) {
1572 return current_size;
1573 }
1574
1575 /* Find the largest hardware supported page size that's less than
1576 * or equal to the (logical) backing page size of guest RAM */
1577 kvm_get_smmu_info(ppc_env_get_cpu(first_cpu), &info);
1578 rampagesize = getrampagesize();
1579 best_page_shift = 0;
1580
1581 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1582 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1583
1584 if (!sps->page_shift) {
1585 continue;
1586 }
1587
1588 if ((sps->page_shift > best_page_shift)
1589 && ((1UL << sps->page_shift) <= rampagesize)) {
1590 best_page_shift = sps->page_shift;
1591 }
1592 }
1593
1594 return MIN(current_size,
1595 1ULL << (best_page_shift + hash_shift - 7));
1596 }
1597 #endif
1598
1599 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1600 {
1601 struct kvm_create_spapr_tce args = {
1602 .liobn = liobn,
1603 .window_size = window_size,
1604 };
1605 long len;
1606 int fd;
1607 void *table;
1608
1609 /* Must set fd to -1 so we don't try to munmap when called for
1610 * destroying the table, which the upper layers -will- do
1611 */
1612 *pfd = -1;
1613 if (!cap_spapr_tce) {
1614 return NULL;
1615 }
1616
1617 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1618 if (fd < 0) {
1619 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1620 liobn);
1621 return NULL;
1622 }
1623
1624 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1625 /* FIXME: round this up to page size */
1626
1627 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1628 if (table == MAP_FAILED) {
1629 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1630 liobn);
1631 close(fd);
1632 return NULL;
1633 }
1634
1635 *pfd = fd;
1636 return table;
1637 }
1638
1639 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1640 {
1641 long len;
1642
1643 if (fd < 0) {
1644 return -1;
1645 }
1646
1647 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1648 if ((munmap(table, len) < 0) ||
1649 (close(fd) < 0)) {
1650 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1651 strerror(errno));
1652 /* Leak the table */
1653 }
1654
1655 return 0;
1656 }
1657
1658 int kvmppc_reset_htab(int shift_hint)
1659 {
1660 uint32_t shift = shift_hint;
1661
1662 if (!kvm_enabled()) {
1663 /* Full emulation, tell caller to allocate htab itself */
1664 return 0;
1665 }
1666 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1667 int ret;
1668 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1669 if (ret == -ENOTTY) {
1670 /* At least some versions of PR KVM advertise the
1671 * capability, but don't implement the ioctl(). Oops.
1672 * Return 0 so that we allocate the htab in qemu, as is
1673 * correct for PR. */
1674 return 0;
1675 } else if (ret < 0) {
1676 return ret;
1677 }
1678 return shift;
1679 }
1680
1681 /* We have a kernel that predates the htab reset calls. For PR
1682 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1683 * this era, it has allocated a 16MB fixed size hash table
1684 * already. Kernels of this era have the GET_PVINFO capability
1685 * only on PR, so we use this hack to determine the right
1686 * answer */
1687 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1688 /* PR - tell caller to allocate htab */
1689 return 0;
1690 } else {
1691 /* HV - assume 16MB kernel allocated htab */
1692 return 24;
1693 }
1694 }
1695
1696 static inline uint32_t mfpvr(void)
1697 {
1698 uint32_t pvr;
1699
1700 asm ("mfpvr %0"
1701 : "=r"(pvr));
1702 return pvr;
1703 }
1704
1705 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1706 {
1707 if (on) {
1708 *word |= flags;
1709 } else {
1710 *word &= ~flags;
1711 }
1712 }
1713
1714 static void kvmppc_host_cpu_initfn(Object *obj)
1715 {
1716 assert(kvm_enabled());
1717 }
1718
1719 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1720 {
1721 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1722 uint32_t vmx = kvmppc_get_vmx();
1723 uint32_t dfp = kvmppc_get_dfp();
1724 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1725 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1726
1727 /* Now fix up the class with information we can query from the host */
1728
1729 if (vmx != -1) {
1730 /* Only override when we know what the host supports */
1731 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1732 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1733 }
1734 if (dfp != -1) {
1735 /* Only override when we know what the host supports */
1736 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1737 }
1738
1739 if (dcache_size != -1) {
1740 pcc->l1_dcache_size = dcache_size;
1741 }
1742
1743 if (icache_size != -1) {
1744 pcc->l1_icache_size = icache_size;
1745 }
1746 }
1747
1748 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1749 {
1750 CPUState *cs = CPU(cpu);
1751 int smt;
1752
1753 /* Adjust cpu index for SMT */
1754 smt = kvmppc_smt_threads();
1755 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1756 + (cs->cpu_index % smp_threads);
1757
1758 return 0;
1759 }
1760
1761 bool kvmppc_has_cap_epr(void)
1762 {
1763 return cap_epr;
1764 }
1765
1766 static int kvm_ppc_register_host_cpu_type(void)
1767 {
1768 TypeInfo type_info = {
1769 .name = TYPE_HOST_POWERPC_CPU,
1770 .instance_init = kvmppc_host_cpu_initfn,
1771 .class_init = kvmppc_host_cpu_class_init,
1772 };
1773 uint32_t host_pvr = mfpvr();
1774 PowerPCCPUClass *pvr_pcc;
1775
1776 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1777 if (pvr_pcc == NULL) {
1778 return -1;
1779 }
1780 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1781 type_register(&type_info);
1782 return 0;
1783 }
1784
1785
1786 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1787 {
1788 return true;
1789 }
1790
1791 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1792 {
1793 return 1;
1794 }
1795
1796 int kvm_arch_on_sigbus(int code, void *addr)
1797 {
1798 return 1;
1799 }
1800
1801 void kvm_arch_init_irq_routing(KVMState *s)
1802 {
1803 }