]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
target-ppc: Split user only code out of mmu_helper.c
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35 #include "mmu-hash64.h"
36
37 #include "hw/sysbus.h"
38 #include "hw/spapr.h"
39 #include "hw/spapr_vio.h"
40
41 //#define DEBUG_KVM
42
43 #ifdef DEBUG_KVM
44 #define dprintf(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #else
47 #define dprintf(fmt, ...) \
48 do { } while (0)
49 #endif
50
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
55 };
56
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
64 static int cap_hior;
65 static int cap_one_reg;
66
67 /* XXX We have a race condition where we actually have a level triggered
68 * interrupt, but the infrastructure can't expose that yet, so the guest
69 * takes but ignores it, goes to sleep and never gets notified that there's
70 * still an interrupt pending.
71 *
72 * As a quick workaround, let's just wake up again 20 ms after we injected
73 * an interrupt. That way we can assure that we're always reinjecting
74 * interrupts in case the guest swallowed them.
75 */
76 static QEMUTimer *idle_timer;
77
78 static void kvm_kick_cpu(void *opaque)
79 {
80 PowerPCCPU *cpu = opaque;
81
82 qemu_cpu_kick(CPU(cpu));
83 }
84
85 static int kvm_ppc_register_host_cpu_type(void);
86
87 int kvm_arch_init(KVMState *s)
88 {
89 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
90 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
91 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
92 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
93 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
94 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
95 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
96 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
97 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
98
99 if (!cap_interrupt_level) {
100 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
101 "VM to stall at times!\n");
102 }
103
104 kvm_ppc_register_host_cpu_type();
105
106 return 0;
107 }
108
109 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
110 {
111 CPUPPCState *cenv = &cpu->env;
112 CPUState *cs = CPU(cpu);
113 struct kvm_sregs sregs;
114 int ret;
115
116 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
117 /* What we're really trying to say is "if we're on BookE, we use
118 the native PVR for now". This is the only sane way to check
119 it though, so we potentially confuse users that they can run
120 BookE guests on BookS. Let's hope nobody dares enough :) */
121 return 0;
122 } else {
123 if (!cap_segstate) {
124 fprintf(stderr, "kvm error: missing PVR setting capability\n");
125 return -ENOSYS;
126 }
127 }
128
129 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
130 if (ret) {
131 return ret;
132 }
133
134 sregs.pvr = cenv->spr[SPR_PVR];
135 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
136 }
137
138 /* Set up a shared TLB array with KVM */
139 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
140 {
141 CPUPPCState *env = &cpu->env;
142 CPUState *cs = CPU(cpu);
143 struct kvm_book3e_206_tlb_params params = {};
144 struct kvm_config_tlb cfg = {};
145 struct kvm_enable_cap encap = {};
146 unsigned int entries = 0;
147 int ret, i;
148
149 if (!kvm_enabled() ||
150 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
151 return 0;
152 }
153
154 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
155
156 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
157 params.tlb_sizes[i] = booke206_tlb_size(env, i);
158 params.tlb_ways[i] = booke206_tlb_ways(env, i);
159 entries += params.tlb_sizes[i];
160 }
161
162 assert(entries == env->nb_tlb);
163 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
164
165 env->tlb_dirty = true;
166
167 cfg.array = (uintptr_t)env->tlb.tlbm;
168 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
169 cfg.params = (uintptr_t)&params;
170 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
171
172 encap.cap = KVM_CAP_SW_TLB;
173 encap.args[0] = (uintptr_t)&cfg;
174
175 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
176 if (ret < 0) {
177 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
178 __func__, strerror(-ret));
179 return ret;
180 }
181
182 env->kvm_sw_tlb = true;
183 return 0;
184 }
185
186
187 #if defined(TARGET_PPC64)
188 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
189 struct kvm_ppc_smmu_info *info)
190 {
191 CPUPPCState *env = &cpu->env;
192 CPUState *cs = CPU(cpu);
193
194 memset(info, 0, sizeof(*info));
195
196 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
197 * need to "guess" what the supported page sizes are.
198 *
199 * For that to work we make a few assumptions:
200 *
201 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
202 * KVM which only supports 4K and 16M pages, but supports them
203 * regardless of the backing store characteritics. We also don't
204 * support 1T segments.
205 *
206 * This is safe as if HV KVM ever supports that capability or PR
207 * KVM grows supports for more page/segment sizes, those versions
208 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
209 * will not hit this fallback
210 *
211 * - Else we are running HV KVM. This means we only support page
212 * sizes that fit in the backing store. Additionally we only
213 * advertize 64K pages if the processor is ARCH 2.06 and we assume
214 * P7 encodings for the SLB and hash table. Here too, we assume
215 * support for any newer processor will mean a kernel that
216 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
217 * this fallback.
218 */
219 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
220 /* No flags */
221 info->flags = 0;
222 info->slb_size = 64;
223
224 /* Standard 4k base page size segment */
225 info->sps[0].page_shift = 12;
226 info->sps[0].slb_enc = 0;
227 info->sps[0].enc[0].page_shift = 12;
228 info->sps[0].enc[0].pte_enc = 0;
229
230 /* Standard 16M large page size segment */
231 info->sps[1].page_shift = 24;
232 info->sps[1].slb_enc = SLB_VSID_L;
233 info->sps[1].enc[0].page_shift = 24;
234 info->sps[1].enc[0].pte_enc = 0;
235 } else {
236 int i = 0;
237
238 /* HV KVM has backing store size restrictions */
239 info->flags = KVM_PPC_PAGE_SIZES_REAL;
240
241 if (env->mmu_model & POWERPC_MMU_1TSEG) {
242 info->flags |= KVM_PPC_1T_SEGMENTS;
243 }
244
245 if (env->mmu_model == POWERPC_MMU_2_06) {
246 info->slb_size = 32;
247 } else {
248 info->slb_size = 64;
249 }
250
251 /* Standard 4k base page size segment */
252 info->sps[i].page_shift = 12;
253 info->sps[i].slb_enc = 0;
254 info->sps[i].enc[0].page_shift = 12;
255 info->sps[i].enc[0].pte_enc = 0;
256 i++;
257
258 /* 64K on MMU 2.06 */
259 if (env->mmu_model == POWERPC_MMU_2_06) {
260 info->sps[i].page_shift = 16;
261 info->sps[i].slb_enc = 0x110;
262 info->sps[i].enc[0].page_shift = 16;
263 info->sps[i].enc[0].pte_enc = 1;
264 i++;
265 }
266
267 /* Standard 16M large page size segment */
268 info->sps[i].page_shift = 24;
269 info->sps[i].slb_enc = SLB_VSID_L;
270 info->sps[i].enc[0].page_shift = 24;
271 info->sps[i].enc[0].pte_enc = 0;
272 }
273 }
274
275 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
276 {
277 CPUState *cs = CPU(cpu);
278 int ret;
279
280 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
281 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
282 if (ret == 0) {
283 return;
284 }
285 }
286
287 kvm_get_fallback_smmu_info(cpu, info);
288 }
289
290 static long getrampagesize(void)
291 {
292 struct statfs fs;
293 int ret;
294
295 if (!mem_path) {
296 /* guest RAM is backed by normal anonymous pages */
297 return getpagesize();
298 }
299
300 do {
301 ret = statfs(mem_path, &fs);
302 } while (ret != 0 && errno == EINTR);
303
304 if (ret != 0) {
305 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
306 strerror(errno));
307 exit(1);
308 }
309
310 #define HUGETLBFS_MAGIC 0x958458f6
311
312 if (fs.f_type != HUGETLBFS_MAGIC) {
313 /* Explicit mempath, but it's ordinary pages */
314 return getpagesize();
315 }
316
317 /* It's hugepage, return the huge page size */
318 return fs.f_bsize;
319 }
320
321 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
322 {
323 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
324 return true;
325 }
326
327 return (1ul << shift) <= rampgsize;
328 }
329
330 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
331 {
332 static struct kvm_ppc_smmu_info smmu_info;
333 static bool has_smmu_info;
334 CPUPPCState *env = &cpu->env;
335 long rampagesize;
336 int iq, ik, jq, jk;
337
338 /* We only handle page sizes for 64-bit server guests for now */
339 if (!(env->mmu_model & POWERPC_MMU_64)) {
340 return;
341 }
342
343 /* Collect MMU info from kernel if not already */
344 if (!has_smmu_info) {
345 kvm_get_smmu_info(cpu, &smmu_info);
346 has_smmu_info = true;
347 }
348
349 rampagesize = getrampagesize();
350
351 /* Convert to QEMU form */
352 memset(&env->sps, 0, sizeof(env->sps));
353
354 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
355 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
356 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
357
358 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
359 ksps->page_shift)) {
360 continue;
361 }
362 qsps->page_shift = ksps->page_shift;
363 qsps->slb_enc = ksps->slb_enc;
364 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
365 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
366 ksps->enc[jk].page_shift)) {
367 continue;
368 }
369 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
370 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
371 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
372 break;
373 }
374 }
375 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
376 break;
377 }
378 }
379 env->slb_nr = smmu_info.slb_size;
380 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
381 env->mmu_model |= POWERPC_MMU_1TSEG;
382 } else {
383 env->mmu_model &= ~POWERPC_MMU_1TSEG;
384 }
385 }
386 #else /* defined (TARGET_PPC64) */
387
388 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
389 {
390 }
391
392 #endif /* !defined (TARGET_PPC64) */
393
394 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
395 {
396 return cpu->cpu_index;
397 }
398
399 int kvm_arch_init_vcpu(CPUState *cs)
400 {
401 PowerPCCPU *cpu = POWERPC_CPU(cs);
402 CPUPPCState *cenv = &cpu->env;
403 int ret;
404
405 /* Gather server mmu info from KVM and update the CPU state */
406 kvm_fixup_page_sizes(cpu);
407
408 /* Synchronize sregs with kvm */
409 ret = kvm_arch_sync_sregs(cpu);
410 if (ret) {
411 return ret;
412 }
413
414 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
415
416 /* Some targets support access to KVM's guest TLB. */
417 switch (cenv->mmu_model) {
418 case POWERPC_MMU_BOOKE206:
419 ret = kvm_booke206_tlb_init(cpu);
420 break;
421 default:
422 break;
423 }
424
425 return ret;
426 }
427
428 void kvm_arch_reset_vcpu(CPUState *cpu)
429 {
430 }
431
432 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
433 {
434 CPUPPCState *env = &cpu->env;
435 CPUState *cs = CPU(cpu);
436 struct kvm_dirty_tlb dirty_tlb;
437 unsigned char *bitmap;
438 int ret;
439
440 if (!env->kvm_sw_tlb) {
441 return;
442 }
443
444 bitmap = g_malloc((env->nb_tlb + 7) / 8);
445 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
446
447 dirty_tlb.bitmap = (uintptr_t)bitmap;
448 dirty_tlb.num_dirty = env->nb_tlb;
449
450 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
451 if (ret) {
452 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
453 __func__, strerror(-ret));
454 }
455
456 g_free(bitmap);
457 }
458
459 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
460 {
461 PowerPCCPU *cpu = POWERPC_CPU(cs);
462 CPUPPCState *env = &cpu->env;
463 union {
464 uint32_t u32;
465 uint64_t u64;
466 } val;
467 struct kvm_one_reg reg = {
468 .id = id,
469 .addr = (uintptr_t) &val,
470 };
471 int ret;
472
473 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
474 if (ret != 0) {
475 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
476 spr, strerror(errno));
477 } else {
478 switch (id & KVM_REG_SIZE_MASK) {
479 case KVM_REG_SIZE_U32:
480 env->spr[spr] = val.u32;
481 break;
482
483 case KVM_REG_SIZE_U64:
484 env->spr[spr] = val.u64;
485 break;
486
487 default:
488 /* Don't handle this size yet */
489 abort();
490 }
491 }
492 }
493
494 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
495 {
496 PowerPCCPU *cpu = POWERPC_CPU(cs);
497 CPUPPCState *env = &cpu->env;
498 union {
499 uint32_t u32;
500 uint64_t u64;
501 } val;
502 struct kvm_one_reg reg = {
503 .id = id,
504 .addr = (uintptr_t) &val,
505 };
506 int ret;
507
508 switch (id & KVM_REG_SIZE_MASK) {
509 case KVM_REG_SIZE_U32:
510 val.u32 = env->spr[spr];
511 break;
512
513 case KVM_REG_SIZE_U64:
514 val.u64 = env->spr[spr];
515 break;
516
517 default:
518 /* Don't handle this size yet */
519 abort();
520 }
521
522 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
523 if (ret != 0) {
524 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
525 spr, strerror(errno));
526 }
527 }
528
529 static int kvm_put_fp(CPUState *cs)
530 {
531 PowerPCCPU *cpu = POWERPC_CPU(cs);
532 CPUPPCState *env = &cpu->env;
533 struct kvm_one_reg reg;
534 int i;
535 int ret;
536
537 if (env->insns_flags & PPC_FLOAT) {
538 uint64_t fpscr = env->fpscr;
539 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
540
541 reg.id = KVM_REG_PPC_FPSCR;
542 reg.addr = (uintptr_t)&fpscr;
543 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
544 if (ret < 0) {
545 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
546 return ret;
547 }
548
549 for (i = 0; i < 32; i++) {
550 uint64_t vsr[2];
551
552 vsr[0] = float64_val(env->fpr[i]);
553 vsr[1] = env->vsr[i];
554 reg.addr = (uintptr_t) &vsr;
555 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
556
557 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
558 if (ret < 0) {
559 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
560 i, strerror(errno));
561 return ret;
562 }
563 }
564 }
565
566 if (env->insns_flags & PPC_ALTIVEC) {
567 reg.id = KVM_REG_PPC_VSCR;
568 reg.addr = (uintptr_t)&env->vscr;
569 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
570 if (ret < 0) {
571 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
572 return ret;
573 }
574
575 for (i = 0; i < 32; i++) {
576 reg.id = KVM_REG_PPC_VR(i);
577 reg.addr = (uintptr_t)&env->avr[i];
578 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
579 if (ret < 0) {
580 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
581 return ret;
582 }
583 }
584 }
585
586 return 0;
587 }
588
589 static int kvm_get_fp(CPUState *cs)
590 {
591 PowerPCCPU *cpu = POWERPC_CPU(cs);
592 CPUPPCState *env = &cpu->env;
593 struct kvm_one_reg reg;
594 int i;
595 int ret;
596
597 if (env->insns_flags & PPC_FLOAT) {
598 uint64_t fpscr;
599 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
600
601 reg.id = KVM_REG_PPC_FPSCR;
602 reg.addr = (uintptr_t)&fpscr;
603 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
604 if (ret < 0) {
605 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
606 return ret;
607 } else {
608 env->fpscr = fpscr;
609 }
610
611 for (i = 0; i < 32; i++) {
612 uint64_t vsr[2];
613
614 reg.addr = (uintptr_t) &vsr;
615 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
616
617 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
618 if (ret < 0) {
619 dprintf("Unable to get %s%d from KVM: %s\n",
620 vsx ? "VSR" : "FPR", i, strerror(errno));
621 return ret;
622 } else {
623 env->fpr[i] = vsr[0];
624 if (vsx) {
625 env->vsr[i] = vsr[1];
626 }
627 }
628 }
629 }
630
631 if (env->insns_flags & PPC_ALTIVEC) {
632 reg.id = KVM_REG_PPC_VSCR;
633 reg.addr = (uintptr_t)&env->vscr;
634 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
635 if (ret < 0) {
636 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
637 return ret;
638 }
639
640 for (i = 0; i < 32; i++) {
641 reg.id = KVM_REG_PPC_VR(i);
642 reg.addr = (uintptr_t)&env->avr[i];
643 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
644 if (ret < 0) {
645 dprintf("Unable to get VR%d from KVM: %s\n",
646 i, strerror(errno));
647 return ret;
648 }
649 }
650 }
651
652 return 0;
653 }
654
655 int kvm_arch_put_registers(CPUState *cs, int level)
656 {
657 PowerPCCPU *cpu = POWERPC_CPU(cs);
658 CPUPPCState *env = &cpu->env;
659 struct kvm_regs regs;
660 int ret;
661 int i;
662
663 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
664 if (ret < 0) {
665 return ret;
666 }
667
668 regs.ctr = env->ctr;
669 regs.lr = env->lr;
670 regs.xer = cpu_read_xer(env);
671 regs.msr = env->msr;
672 regs.pc = env->nip;
673
674 regs.srr0 = env->spr[SPR_SRR0];
675 regs.srr1 = env->spr[SPR_SRR1];
676
677 regs.sprg0 = env->spr[SPR_SPRG0];
678 regs.sprg1 = env->spr[SPR_SPRG1];
679 regs.sprg2 = env->spr[SPR_SPRG2];
680 regs.sprg3 = env->spr[SPR_SPRG3];
681 regs.sprg4 = env->spr[SPR_SPRG4];
682 regs.sprg5 = env->spr[SPR_SPRG5];
683 regs.sprg6 = env->spr[SPR_SPRG6];
684 regs.sprg7 = env->spr[SPR_SPRG7];
685
686 regs.pid = env->spr[SPR_BOOKE_PID];
687
688 for (i = 0;i < 32; i++)
689 regs.gpr[i] = env->gpr[i];
690
691 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
692 if (ret < 0)
693 return ret;
694
695 kvm_put_fp(cs);
696
697 if (env->tlb_dirty) {
698 kvm_sw_tlb_put(cpu);
699 env->tlb_dirty = false;
700 }
701
702 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
703 struct kvm_sregs sregs;
704
705 sregs.pvr = env->spr[SPR_PVR];
706
707 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
708
709 /* Sync SLB */
710 #ifdef TARGET_PPC64
711 for (i = 0; i < 64; i++) {
712 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
713 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
714 }
715 #endif
716
717 /* Sync SRs */
718 for (i = 0; i < 16; i++) {
719 sregs.u.s.ppc32.sr[i] = env->sr[i];
720 }
721
722 /* Sync BATs */
723 for (i = 0; i < 8; i++) {
724 /* Beware. We have to swap upper and lower bits here */
725 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
726 | env->DBAT[1][i];
727 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
728 | env->IBAT[1][i];
729 }
730
731 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
732 if (ret) {
733 return ret;
734 }
735 }
736
737 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
738 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
739 }
740
741 if (cap_one_reg) {
742 int i;
743
744 /* We deliberately ignore errors here, for kernels which have
745 * the ONE_REG calls, but don't support the specific
746 * registers, there's a reasonable chance things will still
747 * work, at least until we try to migrate. */
748 for (i = 0; i < 1024; i++) {
749 uint64_t id = env->spr_cb[i].one_reg_id;
750
751 if (id != 0) {
752 kvm_put_one_spr(cs, id, i);
753 }
754 }
755 }
756
757 return ret;
758 }
759
760 int kvm_arch_get_registers(CPUState *cs)
761 {
762 PowerPCCPU *cpu = POWERPC_CPU(cs);
763 CPUPPCState *env = &cpu->env;
764 struct kvm_regs regs;
765 struct kvm_sregs sregs;
766 uint32_t cr;
767 int i, ret;
768
769 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
770 if (ret < 0)
771 return ret;
772
773 cr = regs.cr;
774 for (i = 7; i >= 0; i--) {
775 env->crf[i] = cr & 15;
776 cr >>= 4;
777 }
778
779 env->ctr = regs.ctr;
780 env->lr = regs.lr;
781 cpu_write_xer(env, regs.xer);
782 env->msr = regs.msr;
783 env->nip = regs.pc;
784
785 env->spr[SPR_SRR0] = regs.srr0;
786 env->spr[SPR_SRR1] = regs.srr1;
787
788 env->spr[SPR_SPRG0] = regs.sprg0;
789 env->spr[SPR_SPRG1] = regs.sprg1;
790 env->spr[SPR_SPRG2] = regs.sprg2;
791 env->spr[SPR_SPRG3] = regs.sprg3;
792 env->spr[SPR_SPRG4] = regs.sprg4;
793 env->spr[SPR_SPRG5] = regs.sprg5;
794 env->spr[SPR_SPRG6] = regs.sprg6;
795 env->spr[SPR_SPRG7] = regs.sprg7;
796
797 env->spr[SPR_BOOKE_PID] = regs.pid;
798
799 for (i = 0;i < 32; i++)
800 env->gpr[i] = regs.gpr[i];
801
802 kvm_get_fp(cs);
803
804 if (cap_booke_sregs) {
805 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
806 if (ret < 0) {
807 return ret;
808 }
809
810 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
811 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
812 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
813 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
814 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
815 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
816 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
817 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
818 env->spr[SPR_DECR] = sregs.u.e.dec;
819 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
820 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
821 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
822 }
823
824 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
825 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
826 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
827 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
828 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
829 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
830 }
831
832 if (sregs.u.e.features & KVM_SREGS_E_64) {
833 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
834 }
835
836 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
837 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
838 }
839
840 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
841 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
842 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
843 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
844 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
845 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
846 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
847 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
848 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
849 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
850 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
851 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
852 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
853 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
854 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
855 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
856 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
857
858 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
859 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
860 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
861 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
862 }
863
864 if (sregs.u.e.features & KVM_SREGS_E_PM) {
865 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
866 }
867
868 if (sregs.u.e.features & KVM_SREGS_E_PC) {
869 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
870 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
871 }
872 }
873
874 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
875 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
876 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
877 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
878 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
879 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
880 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
881 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
882 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
883 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
884 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
885 }
886
887 if (sregs.u.e.features & KVM_SREGS_EXP) {
888 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
889 }
890
891 if (sregs.u.e.features & KVM_SREGS_E_PD) {
892 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
893 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
894 }
895
896 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
897 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
898 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
899 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
900
901 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
902 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
903 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
904 }
905 }
906 }
907
908 if (cap_segstate) {
909 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
910 if (ret < 0) {
911 return ret;
912 }
913
914 ppc_store_sdr1(env, sregs.u.s.sdr1);
915
916 /* Sync SLB */
917 #ifdef TARGET_PPC64
918 for (i = 0; i < 64; i++) {
919 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
920 sregs.u.s.ppc64.slb[i].slbv);
921 }
922 #endif
923
924 /* Sync SRs */
925 for (i = 0; i < 16; i++) {
926 env->sr[i] = sregs.u.s.ppc32.sr[i];
927 }
928
929 /* Sync BATs */
930 for (i = 0; i < 8; i++) {
931 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
932 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
933 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
934 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
935 }
936 }
937
938 if (cap_hior) {
939 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
940 }
941
942 if (cap_one_reg) {
943 int i;
944
945 /* We deliberately ignore errors here, for kernels which have
946 * the ONE_REG calls, but don't support the specific
947 * registers, there's a reasonable chance things will still
948 * work, at least until we try to migrate. */
949 for (i = 0; i < 1024; i++) {
950 uint64_t id = env->spr_cb[i].one_reg_id;
951
952 if (id != 0) {
953 kvm_get_one_spr(cs, id, i);
954 }
955 }
956 }
957
958 return 0;
959 }
960
961 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
962 {
963 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
964
965 if (irq != PPC_INTERRUPT_EXT) {
966 return 0;
967 }
968
969 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
970 return 0;
971 }
972
973 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
974
975 return 0;
976 }
977
978 #if defined(TARGET_PPCEMB)
979 #define PPC_INPUT_INT PPC40x_INPUT_INT
980 #elif defined(TARGET_PPC64)
981 #define PPC_INPUT_INT PPC970_INPUT_INT
982 #else
983 #define PPC_INPUT_INT PPC6xx_INPUT_INT
984 #endif
985
986 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
987 {
988 PowerPCCPU *cpu = POWERPC_CPU(cs);
989 CPUPPCState *env = &cpu->env;
990 int r;
991 unsigned irq;
992
993 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
994 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
995 if (!cap_interrupt_level &&
996 run->ready_for_interrupt_injection &&
997 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
998 (env->irq_input_state & (1<<PPC_INPUT_INT)))
999 {
1000 /* For now KVM disregards the 'irq' argument. However, in the
1001 * future KVM could cache it in-kernel to avoid a heavyweight exit
1002 * when reading the UIC.
1003 */
1004 irq = KVM_INTERRUPT_SET;
1005
1006 dprintf("injected interrupt %d\n", irq);
1007 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1008 if (r < 0) {
1009 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1010 }
1011
1012 /* Always wake up soon in case the interrupt was level based */
1013 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1014 (get_ticks_per_sec() / 50));
1015 }
1016
1017 /* We don't know if there are more interrupts pending after this. However,
1018 * the guest will return to userspace in the course of handling this one
1019 * anyways, so we will get a chance to deliver the rest. */
1020 }
1021
1022 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1023 {
1024 }
1025
1026 int kvm_arch_process_async_events(CPUState *cs)
1027 {
1028 return cs->halted;
1029 }
1030
1031 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1032 {
1033 CPUState *cs = CPU(cpu);
1034 CPUPPCState *env = &cpu->env;
1035
1036 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1037 cs->halted = 1;
1038 env->exception_index = EXCP_HLT;
1039 }
1040
1041 return 0;
1042 }
1043
1044 /* map dcr access to existing qemu dcr emulation */
1045 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1046 {
1047 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1048 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1049
1050 return 0;
1051 }
1052
1053 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1054 {
1055 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1056 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1057
1058 return 0;
1059 }
1060
1061 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1062 {
1063 PowerPCCPU *cpu = POWERPC_CPU(cs);
1064 CPUPPCState *env = &cpu->env;
1065 int ret;
1066
1067 switch (run->exit_reason) {
1068 case KVM_EXIT_DCR:
1069 if (run->dcr.is_write) {
1070 dprintf("handle dcr write\n");
1071 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1072 } else {
1073 dprintf("handle dcr read\n");
1074 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1075 }
1076 break;
1077 case KVM_EXIT_HLT:
1078 dprintf("handle halt\n");
1079 ret = kvmppc_handle_halt(cpu);
1080 break;
1081 #if defined(TARGET_PPC64)
1082 case KVM_EXIT_PAPR_HCALL:
1083 dprintf("handle PAPR hypercall\n");
1084 run->papr_hcall.ret = spapr_hypercall(cpu,
1085 run->papr_hcall.nr,
1086 run->papr_hcall.args);
1087 ret = 0;
1088 break;
1089 #endif
1090 case KVM_EXIT_EPR:
1091 dprintf("handle epr\n");
1092 run->epr.epr = ldl_phys(env->mpic_iack);
1093 ret = 0;
1094 break;
1095 default:
1096 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1097 ret = -1;
1098 break;
1099 }
1100
1101 return ret;
1102 }
1103
1104 static int read_cpuinfo(const char *field, char *value, int len)
1105 {
1106 FILE *f;
1107 int ret = -1;
1108 int field_len = strlen(field);
1109 char line[512];
1110
1111 f = fopen("/proc/cpuinfo", "r");
1112 if (!f) {
1113 return -1;
1114 }
1115
1116 do {
1117 if(!fgets(line, sizeof(line), f)) {
1118 break;
1119 }
1120 if (!strncmp(line, field, field_len)) {
1121 pstrcpy(value, len, line);
1122 ret = 0;
1123 break;
1124 }
1125 } while(*line);
1126
1127 fclose(f);
1128
1129 return ret;
1130 }
1131
1132 uint32_t kvmppc_get_tbfreq(void)
1133 {
1134 char line[512];
1135 char *ns;
1136 uint32_t retval = get_ticks_per_sec();
1137
1138 if (read_cpuinfo("timebase", line, sizeof(line))) {
1139 return retval;
1140 }
1141
1142 if (!(ns = strchr(line, ':'))) {
1143 return retval;
1144 }
1145
1146 ns++;
1147
1148 retval = atoi(ns);
1149 return retval;
1150 }
1151
1152 /* Try to find a device tree node for a CPU with clock-frequency property */
1153 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1154 {
1155 struct dirent *dirp;
1156 DIR *dp;
1157
1158 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1159 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1160 return -1;
1161 }
1162
1163 buf[0] = '\0';
1164 while ((dirp = readdir(dp)) != NULL) {
1165 FILE *f;
1166 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1167 dirp->d_name);
1168 f = fopen(buf, "r");
1169 if (f) {
1170 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1171 fclose(f);
1172 break;
1173 }
1174 buf[0] = '\0';
1175 }
1176 closedir(dp);
1177 if (buf[0] == '\0') {
1178 printf("Unknown host!\n");
1179 return -1;
1180 }
1181
1182 return 0;
1183 }
1184
1185 /* Read a CPU node property from the host device tree that's a single
1186 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1187 * (can't find or open the property, or doesn't understand the
1188 * format) */
1189 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1190 {
1191 char buf[PATH_MAX];
1192 union {
1193 uint32_t v32;
1194 uint64_t v64;
1195 } u;
1196 FILE *f;
1197 int len;
1198
1199 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1200 return -1;
1201 }
1202
1203 strncat(buf, "/", sizeof(buf) - strlen(buf));
1204 strncat(buf, propname, sizeof(buf) - strlen(buf));
1205
1206 f = fopen(buf, "rb");
1207 if (!f) {
1208 return -1;
1209 }
1210
1211 len = fread(&u, 1, sizeof(u), f);
1212 fclose(f);
1213 switch (len) {
1214 case 4:
1215 /* property is a 32-bit quantity */
1216 return be32_to_cpu(u.v32);
1217 case 8:
1218 return be64_to_cpu(u.v64);
1219 }
1220
1221 return 0;
1222 }
1223
1224 uint64_t kvmppc_get_clockfreq(void)
1225 {
1226 return kvmppc_read_int_cpu_dt("clock-frequency");
1227 }
1228
1229 uint32_t kvmppc_get_vmx(void)
1230 {
1231 return kvmppc_read_int_cpu_dt("ibm,vmx");
1232 }
1233
1234 uint32_t kvmppc_get_dfp(void)
1235 {
1236 return kvmppc_read_int_cpu_dt("ibm,dfp");
1237 }
1238
1239 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1240 {
1241 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1242 CPUState *cs = CPU(cpu);
1243
1244 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1245 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1246 return 0;
1247 }
1248
1249 return 1;
1250 }
1251
1252 int kvmppc_get_hasidle(CPUPPCState *env)
1253 {
1254 struct kvm_ppc_pvinfo pvinfo;
1255
1256 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1257 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1258 return 1;
1259 }
1260
1261 return 0;
1262 }
1263
1264 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1265 {
1266 uint32_t *hc = (uint32_t*)buf;
1267 struct kvm_ppc_pvinfo pvinfo;
1268
1269 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1270 memcpy(buf, pvinfo.hcall, buf_len);
1271 return 0;
1272 }
1273
1274 /*
1275 * Fallback to always fail hypercalls:
1276 *
1277 * li r3, -1
1278 * nop
1279 * nop
1280 * nop
1281 */
1282
1283 hc[0] = 0x3860ffff;
1284 hc[1] = 0x60000000;
1285 hc[2] = 0x60000000;
1286 hc[3] = 0x60000000;
1287
1288 return 0;
1289 }
1290
1291 void kvmppc_set_papr(PowerPCCPU *cpu)
1292 {
1293 CPUPPCState *env = &cpu->env;
1294 CPUState *cs = CPU(cpu);
1295 struct kvm_enable_cap cap = {};
1296 int ret;
1297
1298 cap.cap = KVM_CAP_PPC_PAPR;
1299 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1300
1301 if (ret) {
1302 cpu_abort(env, "This KVM version does not support PAPR\n");
1303 }
1304 }
1305
1306 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1307 {
1308 CPUPPCState *env = &cpu->env;
1309 CPUState *cs = CPU(cpu);
1310 struct kvm_enable_cap cap = {};
1311 int ret;
1312
1313 cap.cap = KVM_CAP_PPC_EPR;
1314 cap.args[0] = mpic_proxy;
1315 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1316
1317 if (ret && mpic_proxy) {
1318 cpu_abort(env, "This KVM version does not support EPR\n");
1319 }
1320 }
1321
1322 int kvmppc_smt_threads(void)
1323 {
1324 return cap_ppc_smt ? cap_ppc_smt : 1;
1325 }
1326
1327 #ifdef TARGET_PPC64
1328 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1329 {
1330 void *rma;
1331 off_t size;
1332 int fd;
1333 struct kvm_allocate_rma ret;
1334 MemoryRegion *rma_region;
1335
1336 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1337 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1338 * not necessary on this hardware
1339 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1340 *
1341 * FIXME: We should allow the user to force contiguous RMA
1342 * allocation in the cap_ppc_rma==1 case.
1343 */
1344 if (cap_ppc_rma < 2) {
1345 return 0;
1346 }
1347
1348 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1349 if (fd < 0) {
1350 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1351 strerror(errno));
1352 return -1;
1353 }
1354
1355 size = MIN(ret.rma_size, 256ul << 20);
1356
1357 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1358 if (rma == MAP_FAILED) {
1359 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1360 return -1;
1361 };
1362
1363 rma_region = g_new(MemoryRegion, 1);
1364 memory_region_init_ram_ptr(rma_region, name, size, rma);
1365 vmstate_register_ram_global(rma_region);
1366 memory_region_add_subregion(sysmem, 0, rma_region);
1367
1368 return size;
1369 }
1370
1371 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1372 {
1373 if (cap_ppc_rma >= 2) {
1374 return current_size;
1375 }
1376 return MIN(current_size,
1377 getrampagesize() << (hash_shift - 7));
1378 }
1379 #endif
1380
1381 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1382 {
1383 struct kvm_create_spapr_tce args = {
1384 .liobn = liobn,
1385 .window_size = window_size,
1386 };
1387 long len;
1388 int fd;
1389 void *table;
1390
1391 /* Must set fd to -1 so we don't try to munmap when called for
1392 * destroying the table, which the upper layers -will- do
1393 */
1394 *pfd = -1;
1395 if (!cap_spapr_tce) {
1396 return NULL;
1397 }
1398
1399 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1400 if (fd < 0) {
1401 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1402 liobn);
1403 return NULL;
1404 }
1405
1406 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1407 /* FIXME: round this up to page size */
1408
1409 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1410 if (table == MAP_FAILED) {
1411 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1412 liobn);
1413 close(fd);
1414 return NULL;
1415 }
1416
1417 *pfd = fd;
1418 return table;
1419 }
1420
1421 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1422 {
1423 long len;
1424
1425 if (fd < 0) {
1426 return -1;
1427 }
1428
1429 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1430 if ((munmap(table, len) < 0) ||
1431 (close(fd) < 0)) {
1432 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1433 strerror(errno));
1434 /* Leak the table */
1435 }
1436
1437 return 0;
1438 }
1439
1440 int kvmppc_reset_htab(int shift_hint)
1441 {
1442 uint32_t shift = shift_hint;
1443
1444 if (!kvm_enabled()) {
1445 /* Full emulation, tell caller to allocate htab itself */
1446 return 0;
1447 }
1448 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1449 int ret;
1450 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1451 if (ret == -ENOTTY) {
1452 /* At least some versions of PR KVM advertise the
1453 * capability, but don't implement the ioctl(). Oops.
1454 * Return 0 so that we allocate the htab in qemu, as is
1455 * correct for PR. */
1456 return 0;
1457 } else if (ret < 0) {
1458 return ret;
1459 }
1460 return shift;
1461 }
1462
1463 /* We have a kernel that predates the htab reset calls. For PR
1464 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1465 * this era, it has allocated a 16MB fixed size hash table
1466 * already. Kernels of this era have the GET_PVINFO capability
1467 * only on PR, so we use this hack to determine the right
1468 * answer */
1469 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1470 /* PR - tell caller to allocate htab */
1471 return 0;
1472 } else {
1473 /* HV - assume 16MB kernel allocated htab */
1474 return 24;
1475 }
1476 }
1477
1478 static inline uint32_t mfpvr(void)
1479 {
1480 uint32_t pvr;
1481
1482 asm ("mfpvr %0"
1483 : "=r"(pvr));
1484 return pvr;
1485 }
1486
1487 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1488 {
1489 if (on) {
1490 *word |= flags;
1491 } else {
1492 *word &= ~flags;
1493 }
1494 }
1495
1496 static void kvmppc_host_cpu_initfn(Object *obj)
1497 {
1498 assert(kvm_enabled());
1499 }
1500
1501 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1502 {
1503 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1504 uint32_t vmx = kvmppc_get_vmx();
1505 uint32_t dfp = kvmppc_get_dfp();
1506
1507 /* Now fix up the class with information we can query from the host */
1508
1509 if (vmx != -1) {
1510 /* Only override when we know what the host supports */
1511 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1512 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1513 }
1514 if (dfp != -1) {
1515 /* Only override when we know what the host supports */
1516 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1517 }
1518 }
1519
1520 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1521 {
1522 CPUState *cs = CPU(cpu);
1523 int smt;
1524
1525 /* Adjust cpu index for SMT */
1526 smt = kvmppc_smt_threads();
1527 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1528 + (cs->cpu_index % smp_threads);
1529
1530 return 0;
1531 }
1532
1533 static int kvm_ppc_register_host_cpu_type(void)
1534 {
1535 TypeInfo type_info = {
1536 .name = TYPE_HOST_POWERPC_CPU,
1537 .instance_init = kvmppc_host_cpu_initfn,
1538 .class_init = kvmppc_host_cpu_class_init,
1539 };
1540 uint32_t host_pvr = mfpvr();
1541 PowerPCCPUClass *pvr_pcc;
1542
1543 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1544 if (pvr_pcc == NULL) {
1545 return -1;
1546 }
1547 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1548 type_register(&type_info);
1549 return 0;
1550 }
1551
1552
1553 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1554 {
1555 return true;
1556 }
1557
1558 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1559 {
1560 return 1;
1561 }
1562
1563 int kvm_arch_on_sigbus(int code, void *addr)
1564 {
1565 return 1;
1566 }