]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
cpus: Pass CPUState to qemu_cpu_kick()
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "sysemu.h"
28 #include "kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "cpus.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
69 *
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
73 */
74 static QEMUTimer *idle_timer;
75
76 static void kvm_kick_cpu(void *opaque)
77 {
78 PowerPCCPU *cpu = opaque;
79
80 qemu_cpu_kick(CPU(cpu));
81 }
82
83 int kvm_arch_init(KVMState *s)
84 {
85 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
86 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
87 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
88 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
89 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
90 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
91 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
92 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
93
94 if (!cap_interrupt_level) {
95 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
97 }
98
99 return 0;
100 }
101
102 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
103 {
104 struct kvm_sregs sregs;
105 int ret;
106
107 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
108 /* What we're really trying to say is "if we're on BookE, we use
109 the native PVR for now". This is the only sane way to check
110 it though, so we potentially confuse users that they can run
111 BookE guests on BookS. Let's hope nobody dares enough :) */
112 return 0;
113 } else {
114 if (!cap_segstate) {
115 fprintf(stderr, "kvm error: missing PVR setting capability\n");
116 return -ENOSYS;
117 }
118 }
119
120 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
121 if (ret) {
122 return ret;
123 }
124
125 sregs.pvr = cenv->spr[SPR_PVR];
126 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
127 }
128
129 /* Set up a shared TLB array with KVM */
130 static int kvm_booke206_tlb_init(CPUPPCState *env)
131 {
132 struct kvm_book3e_206_tlb_params params = {};
133 struct kvm_config_tlb cfg = {};
134 struct kvm_enable_cap encap = {};
135 unsigned int entries = 0;
136 int ret, i;
137
138 if (!kvm_enabled() ||
139 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
140 return 0;
141 }
142
143 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
144
145 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
146 params.tlb_sizes[i] = booke206_tlb_size(env, i);
147 params.tlb_ways[i] = booke206_tlb_ways(env, i);
148 entries += params.tlb_sizes[i];
149 }
150
151 assert(entries == env->nb_tlb);
152 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
153
154 env->tlb_dirty = true;
155
156 cfg.array = (uintptr_t)env->tlb.tlbm;
157 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
158 cfg.params = (uintptr_t)&params;
159 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
160
161 encap.cap = KVM_CAP_SW_TLB;
162 encap.args[0] = (uintptr_t)&cfg;
163
164 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
165 if (ret < 0) {
166 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
167 __func__, strerror(-ret));
168 return ret;
169 }
170
171 env->kvm_sw_tlb = true;
172 return 0;
173 }
174
175
176 #if defined(TARGET_PPC64)
177 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
178 struct kvm_ppc_smmu_info *info)
179 {
180 memset(info, 0, sizeof(*info));
181
182 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
183 * need to "guess" what the supported page sizes are.
184 *
185 * For that to work we make a few assumptions:
186 *
187 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
188 * KVM which only supports 4K and 16M pages, but supports them
189 * regardless of the backing store characteritics. We also don't
190 * support 1T segments.
191 *
192 * This is safe as if HV KVM ever supports that capability or PR
193 * KVM grows supports for more page/segment sizes, those versions
194 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
195 * will not hit this fallback
196 *
197 * - Else we are running HV KVM. This means we only support page
198 * sizes that fit in the backing store. Additionally we only
199 * advertize 64K pages if the processor is ARCH 2.06 and we assume
200 * P7 encodings for the SLB and hash table. Here too, we assume
201 * support for any newer processor will mean a kernel that
202 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
203 * this fallback.
204 */
205 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
206 /* No flags */
207 info->flags = 0;
208 info->slb_size = 64;
209
210 /* Standard 4k base page size segment */
211 info->sps[0].page_shift = 12;
212 info->sps[0].slb_enc = 0;
213 info->sps[0].enc[0].page_shift = 12;
214 info->sps[0].enc[0].pte_enc = 0;
215
216 /* Standard 16M large page size segment */
217 info->sps[1].page_shift = 24;
218 info->sps[1].slb_enc = SLB_VSID_L;
219 info->sps[1].enc[0].page_shift = 24;
220 info->sps[1].enc[0].pte_enc = 0;
221 } else {
222 int i = 0;
223
224 /* HV KVM has backing store size restrictions */
225 info->flags = KVM_PPC_PAGE_SIZES_REAL;
226
227 if (env->mmu_model & POWERPC_MMU_1TSEG) {
228 info->flags |= KVM_PPC_1T_SEGMENTS;
229 }
230
231 if (env->mmu_model == POWERPC_MMU_2_06) {
232 info->slb_size = 32;
233 } else {
234 info->slb_size = 64;
235 }
236
237 /* Standard 4k base page size segment */
238 info->sps[i].page_shift = 12;
239 info->sps[i].slb_enc = 0;
240 info->sps[i].enc[0].page_shift = 12;
241 info->sps[i].enc[0].pte_enc = 0;
242 i++;
243
244 /* 64K on MMU 2.06 */
245 if (env->mmu_model == POWERPC_MMU_2_06) {
246 info->sps[i].page_shift = 16;
247 info->sps[i].slb_enc = 0x110;
248 info->sps[i].enc[0].page_shift = 16;
249 info->sps[i].enc[0].pte_enc = 1;
250 i++;
251 }
252
253 /* Standard 16M large page size segment */
254 info->sps[i].page_shift = 24;
255 info->sps[i].slb_enc = SLB_VSID_L;
256 info->sps[i].enc[0].page_shift = 24;
257 info->sps[i].enc[0].pte_enc = 0;
258 }
259 }
260
261 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
262 {
263 int ret;
264
265 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
266 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
267 if (ret == 0) {
268 return;
269 }
270 }
271
272 kvm_get_fallback_smmu_info(env, info);
273 }
274
275 static long getrampagesize(void)
276 {
277 struct statfs fs;
278 int ret;
279
280 if (!mem_path) {
281 /* guest RAM is backed by normal anonymous pages */
282 return getpagesize();
283 }
284
285 do {
286 ret = statfs(mem_path, &fs);
287 } while (ret != 0 && errno == EINTR);
288
289 if (ret != 0) {
290 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
291 strerror(errno));
292 exit(1);
293 }
294
295 #define HUGETLBFS_MAGIC 0x958458f6
296
297 if (fs.f_type != HUGETLBFS_MAGIC) {
298 /* Explicit mempath, but it's ordinary pages */
299 return getpagesize();
300 }
301
302 /* It's hugepage, return the huge page size */
303 return fs.f_bsize;
304 }
305
306 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
307 {
308 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
309 return true;
310 }
311
312 return (1ul << shift) <= rampgsize;
313 }
314
315 static void kvm_fixup_page_sizes(CPUPPCState *env)
316 {
317 static struct kvm_ppc_smmu_info smmu_info;
318 static bool has_smmu_info;
319 long rampagesize;
320 int iq, ik, jq, jk;
321
322 /* We only handle page sizes for 64-bit server guests for now */
323 if (!(env->mmu_model & POWERPC_MMU_64)) {
324 return;
325 }
326
327 /* Collect MMU info from kernel if not already */
328 if (!has_smmu_info) {
329 kvm_get_smmu_info(env, &smmu_info);
330 has_smmu_info = true;
331 }
332
333 rampagesize = getrampagesize();
334
335 /* Convert to QEMU form */
336 memset(&env->sps, 0, sizeof(env->sps));
337
338 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
339 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
340 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
341
342 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
343 ksps->page_shift)) {
344 continue;
345 }
346 qsps->page_shift = ksps->page_shift;
347 qsps->slb_enc = ksps->slb_enc;
348 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
349 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
350 ksps->enc[jk].page_shift)) {
351 continue;
352 }
353 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
354 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
355 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
356 break;
357 }
358 }
359 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
360 break;
361 }
362 }
363 env->slb_nr = smmu_info.slb_size;
364 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
365 env->mmu_model |= POWERPC_MMU_1TSEG;
366 } else {
367 env->mmu_model &= ~POWERPC_MMU_1TSEG;
368 }
369 }
370 #else /* defined (TARGET_PPC64) */
371
372 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
373 {
374 }
375
376 #endif /* !defined (TARGET_PPC64) */
377
378 int kvm_arch_init_vcpu(CPUPPCState *cenv)
379 {
380 PowerPCCPU *cpu = ppc_env_get_cpu(cenv);
381 int ret;
382
383 /* Gather server mmu info from KVM and update the CPU state */
384 kvm_fixup_page_sizes(cenv);
385
386 /* Synchronize sregs with kvm */
387 ret = kvm_arch_sync_sregs(cenv);
388 if (ret) {
389 return ret;
390 }
391
392 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
393
394 /* Some targets support access to KVM's guest TLB. */
395 switch (cenv->mmu_model) {
396 case POWERPC_MMU_BOOKE206:
397 ret = kvm_booke206_tlb_init(cenv);
398 break;
399 default:
400 break;
401 }
402
403 return ret;
404 }
405
406 void kvm_arch_reset_vcpu(CPUPPCState *env)
407 {
408 }
409
410 static void kvm_sw_tlb_put(CPUPPCState *env)
411 {
412 struct kvm_dirty_tlb dirty_tlb;
413 unsigned char *bitmap;
414 int ret;
415
416 if (!env->kvm_sw_tlb) {
417 return;
418 }
419
420 bitmap = g_malloc((env->nb_tlb + 7) / 8);
421 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
422
423 dirty_tlb.bitmap = (uintptr_t)bitmap;
424 dirty_tlb.num_dirty = env->nb_tlb;
425
426 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
427 if (ret) {
428 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
429 __func__, strerror(-ret));
430 }
431
432 g_free(bitmap);
433 }
434
435 int kvm_arch_put_registers(CPUPPCState *env, int level)
436 {
437 struct kvm_regs regs;
438 int ret;
439 int i;
440
441 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
442 if (ret < 0)
443 return ret;
444
445 regs.ctr = env->ctr;
446 regs.lr = env->lr;
447 regs.xer = env->xer;
448 regs.msr = env->msr;
449 regs.pc = env->nip;
450
451 regs.srr0 = env->spr[SPR_SRR0];
452 regs.srr1 = env->spr[SPR_SRR1];
453
454 regs.sprg0 = env->spr[SPR_SPRG0];
455 regs.sprg1 = env->spr[SPR_SPRG1];
456 regs.sprg2 = env->spr[SPR_SPRG2];
457 regs.sprg3 = env->spr[SPR_SPRG3];
458 regs.sprg4 = env->spr[SPR_SPRG4];
459 regs.sprg5 = env->spr[SPR_SPRG5];
460 regs.sprg6 = env->spr[SPR_SPRG6];
461 regs.sprg7 = env->spr[SPR_SPRG7];
462
463 regs.pid = env->spr[SPR_BOOKE_PID];
464
465 for (i = 0;i < 32; i++)
466 regs.gpr[i] = env->gpr[i];
467
468 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
469 if (ret < 0)
470 return ret;
471
472 if (env->tlb_dirty) {
473 kvm_sw_tlb_put(env);
474 env->tlb_dirty = false;
475 }
476
477 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
478 struct kvm_sregs sregs;
479
480 sregs.pvr = env->spr[SPR_PVR];
481
482 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
483
484 /* Sync SLB */
485 #ifdef TARGET_PPC64
486 for (i = 0; i < 64; i++) {
487 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
488 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
489 }
490 #endif
491
492 /* Sync SRs */
493 for (i = 0; i < 16; i++) {
494 sregs.u.s.ppc32.sr[i] = env->sr[i];
495 }
496
497 /* Sync BATs */
498 for (i = 0; i < 8; i++) {
499 /* Beware. We have to swap upper and lower bits here */
500 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
501 | env->DBAT[1][i];
502 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
503 | env->IBAT[1][i];
504 }
505
506 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
507 if (ret) {
508 return ret;
509 }
510 }
511
512 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
513 uint64_t hior = env->spr[SPR_HIOR];
514 struct kvm_one_reg reg = {
515 .id = KVM_REG_PPC_HIOR,
516 .addr = (uintptr_t) &hior,
517 };
518
519 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
520 if (ret) {
521 return ret;
522 }
523 }
524
525 return ret;
526 }
527
528 int kvm_arch_get_registers(CPUPPCState *env)
529 {
530 struct kvm_regs regs;
531 struct kvm_sregs sregs;
532 uint32_t cr;
533 int i, ret;
534
535 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
536 if (ret < 0)
537 return ret;
538
539 cr = regs.cr;
540 for (i = 7; i >= 0; i--) {
541 env->crf[i] = cr & 15;
542 cr >>= 4;
543 }
544
545 env->ctr = regs.ctr;
546 env->lr = regs.lr;
547 env->xer = regs.xer;
548 env->msr = regs.msr;
549 env->nip = regs.pc;
550
551 env->spr[SPR_SRR0] = regs.srr0;
552 env->spr[SPR_SRR1] = regs.srr1;
553
554 env->spr[SPR_SPRG0] = regs.sprg0;
555 env->spr[SPR_SPRG1] = regs.sprg1;
556 env->spr[SPR_SPRG2] = regs.sprg2;
557 env->spr[SPR_SPRG3] = regs.sprg3;
558 env->spr[SPR_SPRG4] = regs.sprg4;
559 env->spr[SPR_SPRG5] = regs.sprg5;
560 env->spr[SPR_SPRG6] = regs.sprg6;
561 env->spr[SPR_SPRG7] = regs.sprg7;
562
563 env->spr[SPR_BOOKE_PID] = regs.pid;
564
565 for (i = 0;i < 32; i++)
566 env->gpr[i] = regs.gpr[i];
567
568 if (cap_booke_sregs) {
569 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
570 if (ret < 0) {
571 return ret;
572 }
573
574 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
575 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
576 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
577 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
578 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
579 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
580 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
581 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
582 env->spr[SPR_DECR] = sregs.u.e.dec;
583 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
584 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
585 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
586 }
587
588 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
589 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
590 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
591 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
592 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
593 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
594 }
595
596 if (sregs.u.e.features & KVM_SREGS_E_64) {
597 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
598 }
599
600 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
601 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
602 }
603
604 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
605 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
606 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
607 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
608 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
609 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
610 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
611 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
612 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
613 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
614 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
615 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
616 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
617 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
618 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
619 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
620 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
621
622 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
623 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
624 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
625 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
626 }
627
628 if (sregs.u.e.features & KVM_SREGS_E_PM) {
629 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
630 }
631
632 if (sregs.u.e.features & KVM_SREGS_E_PC) {
633 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
634 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
635 }
636 }
637
638 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
639 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
640 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
641 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
642 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
643 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
644 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
645 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
646 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
647 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
648 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
649 }
650
651 if (sregs.u.e.features & KVM_SREGS_EXP) {
652 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
653 }
654
655 if (sregs.u.e.features & KVM_SREGS_E_PD) {
656 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
657 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
658 }
659
660 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
661 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
662 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
663 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
664
665 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
666 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
667 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
668 }
669 }
670 }
671
672 if (cap_segstate) {
673 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
674 if (ret < 0) {
675 return ret;
676 }
677
678 ppc_store_sdr1(env, sregs.u.s.sdr1);
679
680 /* Sync SLB */
681 #ifdef TARGET_PPC64
682 for (i = 0; i < 64; i++) {
683 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
684 sregs.u.s.ppc64.slb[i].slbv);
685 }
686 #endif
687
688 /* Sync SRs */
689 for (i = 0; i < 16; i++) {
690 env->sr[i] = sregs.u.s.ppc32.sr[i];
691 }
692
693 /* Sync BATs */
694 for (i = 0; i < 8; i++) {
695 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
696 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
697 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
698 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
699 }
700 }
701
702 return 0;
703 }
704
705 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
706 {
707 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
708
709 if (irq != PPC_INTERRUPT_EXT) {
710 return 0;
711 }
712
713 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
714 return 0;
715 }
716
717 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
718
719 return 0;
720 }
721
722 #if defined(TARGET_PPCEMB)
723 #define PPC_INPUT_INT PPC40x_INPUT_INT
724 #elif defined(TARGET_PPC64)
725 #define PPC_INPUT_INT PPC970_INPUT_INT
726 #else
727 #define PPC_INPUT_INT PPC6xx_INPUT_INT
728 #endif
729
730 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
731 {
732 int r;
733 unsigned irq;
734
735 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
736 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
737 if (!cap_interrupt_level &&
738 run->ready_for_interrupt_injection &&
739 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
740 (env->irq_input_state & (1<<PPC_INPUT_INT)))
741 {
742 /* For now KVM disregards the 'irq' argument. However, in the
743 * future KVM could cache it in-kernel to avoid a heavyweight exit
744 * when reading the UIC.
745 */
746 irq = KVM_INTERRUPT_SET;
747
748 dprintf("injected interrupt %d\n", irq);
749 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
750 if (r < 0)
751 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
752
753 /* Always wake up soon in case the interrupt was level based */
754 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
755 (get_ticks_per_sec() / 50));
756 }
757
758 /* We don't know if there are more interrupts pending after this. However,
759 * the guest will return to userspace in the course of handling this one
760 * anyways, so we will get a chance to deliver the rest. */
761 }
762
763 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
764 {
765 }
766
767 int kvm_arch_process_async_events(CPUPPCState *env)
768 {
769 return env->halted;
770 }
771
772 static int kvmppc_handle_halt(CPUPPCState *env)
773 {
774 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
775 env->halted = 1;
776 env->exception_index = EXCP_HLT;
777 }
778
779 return 0;
780 }
781
782 /* map dcr access to existing qemu dcr emulation */
783 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
784 {
785 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
786 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
787
788 return 0;
789 }
790
791 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
792 {
793 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
794 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
795
796 return 0;
797 }
798
799 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
800 {
801 int ret;
802
803 switch (run->exit_reason) {
804 case KVM_EXIT_DCR:
805 if (run->dcr.is_write) {
806 dprintf("handle dcr write\n");
807 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
808 } else {
809 dprintf("handle dcr read\n");
810 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
811 }
812 break;
813 case KVM_EXIT_HLT:
814 dprintf("handle halt\n");
815 ret = kvmppc_handle_halt(env);
816 break;
817 #ifdef CONFIG_PSERIES
818 case KVM_EXIT_PAPR_HCALL:
819 dprintf("handle PAPR hypercall\n");
820 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
821 run->papr_hcall.args);
822 ret = 0;
823 break;
824 #endif
825 default:
826 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
827 ret = -1;
828 break;
829 }
830
831 return ret;
832 }
833
834 static int read_cpuinfo(const char *field, char *value, int len)
835 {
836 FILE *f;
837 int ret = -1;
838 int field_len = strlen(field);
839 char line[512];
840
841 f = fopen("/proc/cpuinfo", "r");
842 if (!f) {
843 return -1;
844 }
845
846 do {
847 if(!fgets(line, sizeof(line), f)) {
848 break;
849 }
850 if (!strncmp(line, field, field_len)) {
851 pstrcpy(value, len, line);
852 ret = 0;
853 break;
854 }
855 } while(*line);
856
857 fclose(f);
858
859 return ret;
860 }
861
862 uint32_t kvmppc_get_tbfreq(void)
863 {
864 char line[512];
865 char *ns;
866 uint32_t retval = get_ticks_per_sec();
867
868 if (read_cpuinfo("timebase", line, sizeof(line))) {
869 return retval;
870 }
871
872 if (!(ns = strchr(line, ':'))) {
873 return retval;
874 }
875
876 ns++;
877
878 retval = atoi(ns);
879 return retval;
880 }
881
882 /* Try to find a device tree node for a CPU with clock-frequency property */
883 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
884 {
885 struct dirent *dirp;
886 DIR *dp;
887
888 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
889 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
890 return -1;
891 }
892
893 buf[0] = '\0';
894 while ((dirp = readdir(dp)) != NULL) {
895 FILE *f;
896 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
897 dirp->d_name);
898 f = fopen(buf, "r");
899 if (f) {
900 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
901 fclose(f);
902 break;
903 }
904 buf[0] = '\0';
905 }
906 closedir(dp);
907 if (buf[0] == '\0') {
908 printf("Unknown host!\n");
909 return -1;
910 }
911
912 return 0;
913 }
914
915 /* Read a CPU node property from the host device tree that's a single
916 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
917 * (can't find or open the property, or doesn't understand the
918 * format) */
919 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
920 {
921 char buf[PATH_MAX];
922 union {
923 uint32_t v32;
924 uint64_t v64;
925 } u;
926 FILE *f;
927 int len;
928
929 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
930 return -1;
931 }
932
933 strncat(buf, "/", sizeof(buf) - strlen(buf));
934 strncat(buf, propname, sizeof(buf) - strlen(buf));
935
936 f = fopen(buf, "rb");
937 if (!f) {
938 return -1;
939 }
940
941 len = fread(&u, 1, sizeof(u), f);
942 fclose(f);
943 switch (len) {
944 case 4:
945 /* property is a 32-bit quantity */
946 return be32_to_cpu(u.v32);
947 case 8:
948 return be64_to_cpu(u.v64);
949 }
950
951 return 0;
952 }
953
954 uint64_t kvmppc_get_clockfreq(void)
955 {
956 return kvmppc_read_int_cpu_dt("clock-frequency");
957 }
958
959 uint32_t kvmppc_get_vmx(void)
960 {
961 return kvmppc_read_int_cpu_dt("ibm,vmx");
962 }
963
964 uint32_t kvmppc_get_dfp(void)
965 {
966 return kvmppc_read_int_cpu_dt("ibm,dfp");
967 }
968
969 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
970 {
971 uint32_t *hc = (uint32_t*)buf;
972
973 struct kvm_ppc_pvinfo pvinfo;
974
975 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
976 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
977 memcpy(buf, pvinfo.hcall, buf_len);
978
979 return 0;
980 }
981
982 /*
983 * Fallback to always fail hypercalls:
984 *
985 * li r3, -1
986 * nop
987 * nop
988 * nop
989 */
990
991 hc[0] = 0x3860ffff;
992 hc[1] = 0x60000000;
993 hc[2] = 0x60000000;
994 hc[3] = 0x60000000;
995
996 return 0;
997 }
998
999 void kvmppc_set_papr(CPUPPCState *env)
1000 {
1001 struct kvm_enable_cap cap = {};
1002 int ret;
1003
1004 cap.cap = KVM_CAP_PPC_PAPR;
1005 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1006
1007 if (ret) {
1008 cpu_abort(env, "This KVM version does not support PAPR\n");
1009 }
1010 }
1011
1012 int kvmppc_smt_threads(void)
1013 {
1014 return cap_ppc_smt ? cap_ppc_smt : 1;
1015 }
1016
1017 #ifdef TARGET_PPC64
1018 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1019 {
1020 void *rma;
1021 off_t size;
1022 int fd;
1023 struct kvm_allocate_rma ret;
1024 MemoryRegion *rma_region;
1025
1026 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1027 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1028 * not necessary on this hardware
1029 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1030 *
1031 * FIXME: We should allow the user to force contiguous RMA
1032 * allocation in the cap_ppc_rma==1 case.
1033 */
1034 if (cap_ppc_rma < 2) {
1035 return 0;
1036 }
1037
1038 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1039 if (fd < 0) {
1040 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1041 strerror(errno));
1042 return -1;
1043 }
1044
1045 size = MIN(ret.rma_size, 256ul << 20);
1046
1047 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1048 if (rma == MAP_FAILED) {
1049 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1050 return -1;
1051 };
1052
1053 rma_region = g_new(MemoryRegion, 1);
1054 memory_region_init_ram_ptr(rma_region, name, size, rma);
1055 vmstate_register_ram_global(rma_region);
1056 memory_region_add_subregion(sysmem, 0, rma_region);
1057
1058 return size;
1059 }
1060
1061 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1062 {
1063 if (cap_ppc_rma >= 2) {
1064 return current_size;
1065 }
1066 return MIN(current_size,
1067 getrampagesize() << (hash_shift - 7));
1068 }
1069 #endif
1070
1071 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1072 {
1073 struct kvm_create_spapr_tce args = {
1074 .liobn = liobn,
1075 .window_size = window_size,
1076 };
1077 long len;
1078 int fd;
1079 void *table;
1080
1081 /* Must set fd to -1 so we don't try to munmap when called for
1082 * destroying the table, which the upper layers -will- do
1083 */
1084 *pfd = -1;
1085 if (!cap_spapr_tce) {
1086 return NULL;
1087 }
1088
1089 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1090 if (fd < 0) {
1091 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1092 liobn);
1093 return NULL;
1094 }
1095
1096 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1097 /* FIXME: round this up to page size */
1098
1099 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1100 if (table == MAP_FAILED) {
1101 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1102 liobn);
1103 close(fd);
1104 return NULL;
1105 }
1106
1107 *pfd = fd;
1108 return table;
1109 }
1110
1111 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1112 {
1113 long len;
1114
1115 if (fd < 0) {
1116 return -1;
1117 }
1118
1119 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1120 if ((munmap(table, len) < 0) ||
1121 (close(fd) < 0)) {
1122 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1123 strerror(errno));
1124 /* Leak the table */
1125 }
1126
1127 return 0;
1128 }
1129
1130 int kvmppc_reset_htab(int shift_hint)
1131 {
1132 uint32_t shift = shift_hint;
1133
1134 if (!kvm_enabled()) {
1135 /* Full emulation, tell caller to allocate htab itself */
1136 return 0;
1137 }
1138 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1139 int ret;
1140 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1141 if (ret == -ENOTTY) {
1142 /* At least some versions of PR KVM advertise the
1143 * capability, but don't implement the ioctl(). Oops.
1144 * Return 0 so that we allocate the htab in qemu, as is
1145 * correct for PR. */
1146 return 0;
1147 } else if (ret < 0) {
1148 return ret;
1149 }
1150 return shift;
1151 }
1152
1153 /* We have a kernel that predates the htab reset calls. For PR
1154 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1155 * this era, it has allocated a 16MB fixed size hash table
1156 * already. Kernels of this era have the GET_PVINFO capability
1157 * only on PR, so we use this hack to determine the right
1158 * answer */
1159 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1160 /* PR - tell caller to allocate htab */
1161 return 0;
1162 } else {
1163 /* HV - assume 16MB kernel allocated htab */
1164 return 24;
1165 }
1166 }
1167
1168 static inline uint32_t mfpvr(void)
1169 {
1170 uint32_t pvr;
1171
1172 asm ("mfpvr %0"
1173 : "=r"(pvr));
1174 return pvr;
1175 }
1176
1177 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1178 {
1179 if (on) {
1180 *word |= flags;
1181 } else {
1182 *word &= ~flags;
1183 }
1184 }
1185
1186 const ppc_def_t *kvmppc_host_cpu_def(void)
1187 {
1188 uint32_t host_pvr = mfpvr();
1189 const ppc_def_t *base_spec;
1190 ppc_def_t *spec;
1191 uint32_t vmx = kvmppc_get_vmx();
1192 uint32_t dfp = kvmppc_get_dfp();
1193
1194 base_spec = ppc_find_by_pvr(host_pvr);
1195
1196 spec = g_malloc0(sizeof(*spec));
1197 memcpy(spec, base_spec, sizeof(*spec));
1198
1199 /* Now fix up the spec with information we can query from the host */
1200
1201 if (vmx != -1) {
1202 /* Only override when we know what the host supports */
1203 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1204 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1205 }
1206 if (dfp != -1) {
1207 /* Only override when we know what the host supports */
1208 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1209 }
1210
1211 return spec;
1212 }
1213
1214 int kvmppc_fixup_cpu(CPUPPCState *env)
1215 {
1216 int smt;
1217
1218 /* Adjust cpu index for SMT */
1219 smt = kvmppc_smt_threads();
1220 env->cpu_index = (env->cpu_index / smp_threads) * smt
1221 + (env->cpu_index % smp_threads);
1222
1223 return 0;
1224 }
1225
1226
1227 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1228 {
1229 return true;
1230 }
1231
1232 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1233 {
1234 return 1;
1235 }
1236
1237 int kvm_arch_on_sigbus(int code, void *addr)
1238 {
1239 return 1;
1240 }