]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
target-ppc: Rename kvm_kick_{env => cpu} and pass PowerPCCPU
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "sysemu.h"
28 #include "kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "cpus.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
69 *
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
73 */
74 static QEMUTimer *idle_timer;
75
76 static void kvm_kick_cpu(void *opaque)
77 {
78 PowerPCCPU *cpu = opaque;
79 CPUPPCState *env = &cpu->env;
80
81 qemu_cpu_kick(env);
82 }
83
84 int kvm_arch_init(KVMState *s)
85 {
86 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
87 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
88 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
89 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
90 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
91 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
92 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
93 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
94
95 if (!cap_interrupt_level) {
96 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
97 "VM to stall at times!\n");
98 }
99
100 return 0;
101 }
102
103 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
104 {
105 struct kvm_sregs sregs;
106 int ret;
107
108 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
109 /* What we're really trying to say is "if we're on BookE, we use
110 the native PVR for now". This is the only sane way to check
111 it though, so we potentially confuse users that they can run
112 BookE guests on BookS. Let's hope nobody dares enough :) */
113 return 0;
114 } else {
115 if (!cap_segstate) {
116 fprintf(stderr, "kvm error: missing PVR setting capability\n");
117 return -ENOSYS;
118 }
119 }
120
121 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
122 if (ret) {
123 return ret;
124 }
125
126 sregs.pvr = cenv->spr[SPR_PVR];
127 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
128 }
129
130 /* Set up a shared TLB array with KVM */
131 static int kvm_booke206_tlb_init(CPUPPCState *env)
132 {
133 struct kvm_book3e_206_tlb_params params = {};
134 struct kvm_config_tlb cfg = {};
135 struct kvm_enable_cap encap = {};
136 unsigned int entries = 0;
137 int ret, i;
138
139 if (!kvm_enabled() ||
140 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
141 return 0;
142 }
143
144 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
145
146 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
147 params.tlb_sizes[i] = booke206_tlb_size(env, i);
148 params.tlb_ways[i] = booke206_tlb_ways(env, i);
149 entries += params.tlb_sizes[i];
150 }
151
152 assert(entries == env->nb_tlb);
153 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
154
155 env->tlb_dirty = true;
156
157 cfg.array = (uintptr_t)env->tlb.tlbm;
158 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
159 cfg.params = (uintptr_t)&params;
160 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
161
162 encap.cap = KVM_CAP_SW_TLB;
163 encap.args[0] = (uintptr_t)&cfg;
164
165 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
166 if (ret < 0) {
167 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
168 __func__, strerror(-ret));
169 return ret;
170 }
171
172 env->kvm_sw_tlb = true;
173 return 0;
174 }
175
176
177 #if defined(TARGET_PPC64)
178 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
179 struct kvm_ppc_smmu_info *info)
180 {
181 memset(info, 0, sizeof(*info));
182
183 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
184 * need to "guess" what the supported page sizes are.
185 *
186 * For that to work we make a few assumptions:
187 *
188 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
189 * KVM which only supports 4K and 16M pages, but supports them
190 * regardless of the backing store characteritics. We also don't
191 * support 1T segments.
192 *
193 * This is safe as if HV KVM ever supports that capability or PR
194 * KVM grows supports for more page/segment sizes, those versions
195 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
196 * will not hit this fallback
197 *
198 * - Else we are running HV KVM. This means we only support page
199 * sizes that fit in the backing store. Additionally we only
200 * advertize 64K pages if the processor is ARCH 2.06 and we assume
201 * P7 encodings for the SLB and hash table. Here too, we assume
202 * support for any newer processor will mean a kernel that
203 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
204 * this fallback.
205 */
206 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
207 /* No flags */
208 info->flags = 0;
209 info->slb_size = 64;
210
211 /* Standard 4k base page size segment */
212 info->sps[0].page_shift = 12;
213 info->sps[0].slb_enc = 0;
214 info->sps[0].enc[0].page_shift = 12;
215 info->sps[0].enc[0].pte_enc = 0;
216
217 /* Standard 16M large page size segment */
218 info->sps[1].page_shift = 24;
219 info->sps[1].slb_enc = SLB_VSID_L;
220 info->sps[1].enc[0].page_shift = 24;
221 info->sps[1].enc[0].pte_enc = 0;
222 } else {
223 int i = 0;
224
225 /* HV KVM has backing store size restrictions */
226 info->flags = KVM_PPC_PAGE_SIZES_REAL;
227
228 if (env->mmu_model & POWERPC_MMU_1TSEG) {
229 info->flags |= KVM_PPC_1T_SEGMENTS;
230 }
231
232 if (env->mmu_model == POWERPC_MMU_2_06) {
233 info->slb_size = 32;
234 } else {
235 info->slb_size = 64;
236 }
237
238 /* Standard 4k base page size segment */
239 info->sps[i].page_shift = 12;
240 info->sps[i].slb_enc = 0;
241 info->sps[i].enc[0].page_shift = 12;
242 info->sps[i].enc[0].pte_enc = 0;
243 i++;
244
245 /* 64K on MMU 2.06 */
246 if (env->mmu_model == POWERPC_MMU_2_06) {
247 info->sps[i].page_shift = 16;
248 info->sps[i].slb_enc = 0x110;
249 info->sps[i].enc[0].page_shift = 16;
250 info->sps[i].enc[0].pte_enc = 1;
251 i++;
252 }
253
254 /* Standard 16M large page size segment */
255 info->sps[i].page_shift = 24;
256 info->sps[i].slb_enc = SLB_VSID_L;
257 info->sps[i].enc[0].page_shift = 24;
258 info->sps[i].enc[0].pte_enc = 0;
259 }
260 }
261
262 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
263 {
264 int ret;
265
266 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
267 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
268 if (ret == 0) {
269 return;
270 }
271 }
272
273 kvm_get_fallback_smmu_info(env, info);
274 }
275
276 static long getrampagesize(void)
277 {
278 struct statfs fs;
279 int ret;
280
281 if (!mem_path) {
282 /* guest RAM is backed by normal anonymous pages */
283 return getpagesize();
284 }
285
286 do {
287 ret = statfs(mem_path, &fs);
288 } while (ret != 0 && errno == EINTR);
289
290 if (ret != 0) {
291 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
292 strerror(errno));
293 exit(1);
294 }
295
296 #define HUGETLBFS_MAGIC 0x958458f6
297
298 if (fs.f_type != HUGETLBFS_MAGIC) {
299 /* Explicit mempath, but it's ordinary pages */
300 return getpagesize();
301 }
302
303 /* It's hugepage, return the huge page size */
304 return fs.f_bsize;
305 }
306
307 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
308 {
309 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
310 return true;
311 }
312
313 return (1ul << shift) <= rampgsize;
314 }
315
316 static void kvm_fixup_page_sizes(CPUPPCState *env)
317 {
318 static struct kvm_ppc_smmu_info smmu_info;
319 static bool has_smmu_info;
320 long rampagesize;
321 int iq, ik, jq, jk;
322
323 /* We only handle page sizes for 64-bit server guests for now */
324 if (!(env->mmu_model & POWERPC_MMU_64)) {
325 return;
326 }
327
328 /* Collect MMU info from kernel if not already */
329 if (!has_smmu_info) {
330 kvm_get_smmu_info(env, &smmu_info);
331 has_smmu_info = true;
332 }
333
334 rampagesize = getrampagesize();
335
336 /* Convert to QEMU form */
337 memset(&env->sps, 0, sizeof(env->sps));
338
339 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
340 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
341 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
342
343 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
344 ksps->page_shift)) {
345 continue;
346 }
347 qsps->page_shift = ksps->page_shift;
348 qsps->slb_enc = ksps->slb_enc;
349 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
350 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
351 ksps->enc[jk].page_shift)) {
352 continue;
353 }
354 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
355 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
356 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
357 break;
358 }
359 }
360 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
361 break;
362 }
363 }
364 env->slb_nr = smmu_info.slb_size;
365 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
366 env->mmu_model |= POWERPC_MMU_1TSEG;
367 } else {
368 env->mmu_model &= ~POWERPC_MMU_1TSEG;
369 }
370 }
371 #else /* defined (TARGET_PPC64) */
372
373 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
374 {
375 }
376
377 #endif /* !defined (TARGET_PPC64) */
378
379 int kvm_arch_init_vcpu(CPUPPCState *cenv)
380 {
381 PowerPCCPU *cpu = ppc_env_get_cpu(cenv);
382 int ret;
383
384 /* Gather server mmu info from KVM and update the CPU state */
385 kvm_fixup_page_sizes(cenv);
386
387 /* Synchronize sregs with kvm */
388 ret = kvm_arch_sync_sregs(cenv);
389 if (ret) {
390 return ret;
391 }
392
393 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
394
395 /* Some targets support access to KVM's guest TLB. */
396 switch (cenv->mmu_model) {
397 case POWERPC_MMU_BOOKE206:
398 ret = kvm_booke206_tlb_init(cenv);
399 break;
400 default:
401 break;
402 }
403
404 return ret;
405 }
406
407 void kvm_arch_reset_vcpu(CPUPPCState *env)
408 {
409 }
410
411 static void kvm_sw_tlb_put(CPUPPCState *env)
412 {
413 struct kvm_dirty_tlb dirty_tlb;
414 unsigned char *bitmap;
415 int ret;
416
417 if (!env->kvm_sw_tlb) {
418 return;
419 }
420
421 bitmap = g_malloc((env->nb_tlb + 7) / 8);
422 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
423
424 dirty_tlb.bitmap = (uintptr_t)bitmap;
425 dirty_tlb.num_dirty = env->nb_tlb;
426
427 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
428 if (ret) {
429 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
430 __func__, strerror(-ret));
431 }
432
433 g_free(bitmap);
434 }
435
436 int kvm_arch_put_registers(CPUPPCState *env, int level)
437 {
438 struct kvm_regs regs;
439 int ret;
440 int i;
441
442 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
443 if (ret < 0)
444 return ret;
445
446 regs.ctr = env->ctr;
447 regs.lr = env->lr;
448 regs.xer = env->xer;
449 regs.msr = env->msr;
450 regs.pc = env->nip;
451
452 regs.srr0 = env->spr[SPR_SRR0];
453 regs.srr1 = env->spr[SPR_SRR1];
454
455 regs.sprg0 = env->spr[SPR_SPRG0];
456 regs.sprg1 = env->spr[SPR_SPRG1];
457 regs.sprg2 = env->spr[SPR_SPRG2];
458 regs.sprg3 = env->spr[SPR_SPRG3];
459 regs.sprg4 = env->spr[SPR_SPRG4];
460 regs.sprg5 = env->spr[SPR_SPRG5];
461 regs.sprg6 = env->spr[SPR_SPRG6];
462 regs.sprg7 = env->spr[SPR_SPRG7];
463
464 regs.pid = env->spr[SPR_BOOKE_PID];
465
466 for (i = 0;i < 32; i++)
467 regs.gpr[i] = env->gpr[i];
468
469 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
470 if (ret < 0)
471 return ret;
472
473 if (env->tlb_dirty) {
474 kvm_sw_tlb_put(env);
475 env->tlb_dirty = false;
476 }
477
478 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
479 struct kvm_sregs sregs;
480
481 sregs.pvr = env->spr[SPR_PVR];
482
483 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
484
485 /* Sync SLB */
486 #ifdef TARGET_PPC64
487 for (i = 0; i < 64; i++) {
488 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
489 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
490 }
491 #endif
492
493 /* Sync SRs */
494 for (i = 0; i < 16; i++) {
495 sregs.u.s.ppc32.sr[i] = env->sr[i];
496 }
497
498 /* Sync BATs */
499 for (i = 0; i < 8; i++) {
500 /* Beware. We have to swap upper and lower bits here */
501 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
502 | env->DBAT[1][i];
503 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
504 | env->IBAT[1][i];
505 }
506
507 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
508 if (ret) {
509 return ret;
510 }
511 }
512
513 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
514 uint64_t hior = env->spr[SPR_HIOR];
515 struct kvm_one_reg reg = {
516 .id = KVM_REG_PPC_HIOR,
517 .addr = (uintptr_t) &hior,
518 };
519
520 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
521 if (ret) {
522 return ret;
523 }
524 }
525
526 return ret;
527 }
528
529 int kvm_arch_get_registers(CPUPPCState *env)
530 {
531 struct kvm_regs regs;
532 struct kvm_sregs sregs;
533 uint32_t cr;
534 int i, ret;
535
536 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
537 if (ret < 0)
538 return ret;
539
540 cr = regs.cr;
541 for (i = 7; i >= 0; i--) {
542 env->crf[i] = cr & 15;
543 cr >>= 4;
544 }
545
546 env->ctr = regs.ctr;
547 env->lr = regs.lr;
548 env->xer = regs.xer;
549 env->msr = regs.msr;
550 env->nip = regs.pc;
551
552 env->spr[SPR_SRR0] = regs.srr0;
553 env->spr[SPR_SRR1] = regs.srr1;
554
555 env->spr[SPR_SPRG0] = regs.sprg0;
556 env->spr[SPR_SPRG1] = regs.sprg1;
557 env->spr[SPR_SPRG2] = regs.sprg2;
558 env->spr[SPR_SPRG3] = regs.sprg3;
559 env->spr[SPR_SPRG4] = regs.sprg4;
560 env->spr[SPR_SPRG5] = regs.sprg5;
561 env->spr[SPR_SPRG6] = regs.sprg6;
562 env->spr[SPR_SPRG7] = regs.sprg7;
563
564 env->spr[SPR_BOOKE_PID] = regs.pid;
565
566 for (i = 0;i < 32; i++)
567 env->gpr[i] = regs.gpr[i];
568
569 if (cap_booke_sregs) {
570 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
571 if (ret < 0) {
572 return ret;
573 }
574
575 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
576 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
577 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
578 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
579 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
580 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
581 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
582 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
583 env->spr[SPR_DECR] = sregs.u.e.dec;
584 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
585 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
586 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
587 }
588
589 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
590 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
591 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
592 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
593 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
594 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
595 }
596
597 if (sregs.u.e.features & KVM_SREGS_E_64) {
598 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
599 }
600
601 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
602 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
603 }
604
605 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
606 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
607 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
608 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
609 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
610 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
611 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
612 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
613 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
614 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
615 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
616 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
617 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
618 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
619 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
620 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
621 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
622
623 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
624 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
625 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
626 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
627 }
628
629 if (sregs.u.e.features & KVM_SREGS_E_PM) {
630 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
631 }
632
633 if (sregs.u.e.features & KVM_SREGS_E_PC) {
634 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
635 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
636 }
637 }
638
639 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
640 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
641 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
642 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
643 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
644 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
645 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
646 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
647 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
648 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
649 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
650 }
651
652 if (sregs.u.e.features & KVM_SREGS_EXP) {
653 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
654 }
655
656 if (sregs.u.e.features & KVM_SREGS_E_PD) {
657 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
658 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
659 }
660
661 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
662 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
663 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
664 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
665
666 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
667 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
668 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
669 }
670 }
671 }
672
673 if (cap_segstate) {
674 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
675 if (ret < 0) {
676 return ret;
677 }
678
679 ppc_store_sdr1(env, sregs.u.s.sdr1);
680
681 /* Sync SLB */
682 #ifdef TARGET_PPC64
683 for (i = 0; i < 64; i++) {
684 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
685 sregs.u.s.ppc64.slb[i].slbv);
686 }
687 #endif
688
689 /* Sync SRs */
690 for (i = 0; i < 16; i++) {
691 env->sr[i] = sregs.u.s.ppc32.sr[i];
692 }
693
694 /* Sync BATs */
695 for (i = 0; i < 8; i++) {
696 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
697 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
698 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
699 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
700 }
701 }
702
703 return 0;
704 }
705
706 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
707 {
708 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
709
710 if (irq != PPC_INTERRUPT_EXT) {
711 return 0;
712 }
713
714 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
715 return 0;
716 }
717
718 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
719
720 return 0;
721 }
722
723 #if defined(TARGET_PPCEMB)
724 #define PPC_INPUT_INT PPC40x_INPUT_INT
725 #elif defined(TARGET_PPC64)
726 #define PPC_INPUT_INT PPC970_INPUT_INT
727 #else
728 #define PPC_INPUT_INT PPC6xx_INPUT_INT
729 #endif
730
731 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
732 {
733 int r;
734 unsigned irq;
735
736 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
737 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
738 if (!cap_interrupt_level &&
739 run->ready_for_interrupt_injection &&
740 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
741 (env->irq_input_state & (1<<PPC_INPUT_INT)))
742 {
743 /* For now KVM disregards the 'irq' argument. However, in the
744 * future KVM could cache it in-kernel to avoid a heavyweight exit
745 * when reading the UIC.
746 */
747 irq = KVM_INTERRUPT_SET;
748
749 dprintf("injected interrupt %d\n", irq);
750 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
751 if (r < 0)
752 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
753
754 /* Always wake up soon in case the interrupt was level based */
755 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
756 (get_ticks_per_sec() / 50));
757 }
758
759 /* We don't know if there are more interrupts pending after this. However,
760 * the guest will return to userspace in the course of handling this one
761 * anyways, so we will get a chance to deliver the rest. */
762 }
763
764 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
765 {
766 }
767
768 int kvm_arch_process_async_events(CPUPPCState *env)
769 {
770 return env->halted;
771 }
772
773 static int kvmppc_handle_halt(CPUPPCState *env)
774 {
775 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
776 env->halted = 1;
777 env->exception_index = EXCP_HLT;
778 }
779
780 return 0;
781 }
782
783 /* map dcr access to existing qemu dcr emulation */
784 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
785 {
786 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
787 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
788
789 return 0;
790 }
791
792 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
793 {
794 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
795 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
796
797 return 0;
798 }
799
800 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
801 {
802 int ret;
803
804 switch (run->exit_reason) {
805 case KVM_EXIT_DCR:
806 if (run->dcr.is_write) {
807 dprintf("handle dcr write\n");
808 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
809 } else {
810 dprintf("handle dcr read\n");
811 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
812 }
813 break;
814 case KVM_EXIT_HLT:
815 dprintf("handle halt\n");
816 ret = kvmppc_handle_halt(env);
817 break;
818 #ifdef CONFIG_PSERIES
819 case KVM_EXIT_PAPR_HCALL:
820 dprintf("handle PAPR hypercall\n");
821 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
822 run->papr_hcall.args);
823 ret = 0;
824 break;
825 #endif
826 default:
827 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
828 ret = -1;
829 break;
830 }
831
832 return ret;
833 }
834
835 static int read_cpuinfo(const char *field, char *value, int len)
836 {
837 FILE *f;
838 int ret = -1;
839 int field_len = strlen(field);
840 char line[512];
841
842 f = fopen("/proc/cpuinfo", "r");
843 if (!f) {
844 return -1;
845 }
846
847 do {
848 if(!fgets(line, sizeof(line), f)) {
849 break;
850 }
851 if (!strncmp(line, field, field_len)) {
852 pstrcpy(value, len, line);
853 ret = 0;
854 break;
855 }
856 } while(*line);
857
858 fclose(f);
859
860 return ret;
861 }
862
863 uint32_t kvmppc_get_tbfreq(void)
864 {
865 char line[512];
866 char *ns;
867 uint32_t retval = get_ticks_per_sec();
868
869 if (read_cpuinfo("timebase", line, sizeof(line))) {
870 return retval;
871 }
872
873 if (!(ns = strchr(line, ':'))) {
874 return retval;
875 }
876
877 ns++;
878
879 retval = atoi(ns);
880 return retval;
881 }
882
883 /* Try to find a device tree node for a CPU with clock-frequency property */
884 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
885 {
886 struct dirent *dirp;
887 DIR *dp;
888
889 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
890 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
891 return -1;
892 }
893
894 buf[0] = '\0';
895 while ((dirp = readdir(dp)) != NULL) {
896 FILE *f;
897 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
898 dirp->d_name);
899 f = fopen(buf, "r");
900 if (f) {
901 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
902 fclose(f);
903 break;
904 }
905 buf[0] = '\0';
906 }
907 closedir(dp);
908 if (buf[0] == '\0') {
909 printf("Unknown host!\n");
910 return -1;
911 }
912
913 return 0;
914 }
915
916 /* Read a CPU node property from the host device tree that's a single
917 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
918 * (can't find or open the property, or doesn't understand the
919 * format) */
920 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
921 {
922 char buf[PATH_MAX];
923 union {
924 uint32_t v32;
925 uint64_t v64;
926 } u;
927 FILE *f;
928 int len;
929
930 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
931 return -1;
932 }
933
934 strncat(buf, "/", sizeof(buf) - strlen(buf));
935 strncat(buf, propname, sizeof(buf) - strlen(buf));
936
937 f = fopen(buf, "rb");
938 if (!f) {
939 return -1;
940 }
941
942 len = fread(&u, 1, sizeof(u), f);
943 fclose(f);
944 switch (len) {
945 case 4:
946 /* property is a 32-bit quantity */
947 return be32_to_cpu(u.v32);
948 case 8:
949 return be64_to_cpu(u.v64);
950 }
951
952 return 0;
953 }
954
955 uint64_t kvmppc_get_clockfreq(void)
956 {
957 return kvmppc_read_int_cpu_dt("clock-frequency");
958 }
959
960 uint32_t kvmppc_get_vmx(void)
961 {
962 return kvmppc_read_int_cpu_dt("ibm,vmx");
963 }
964
965 uint32_t kvmppc_get_dfp(void)
966 {
967 return kvmppc_read_int_cpu_dt("ibm,dfp");
968 }
969
970 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
971 {
972 uint32_t *hc = (uint32_t*)buf;
973
974 struct kvm_ppc_pvinfo pvinfo;
975
976 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
977 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
978 memcpy(buf, pvinfo.hcall, buf_len);
979
980 return 0;
981 }
982
983 /*
984 * Fallback to always fail hypercalls:
985 *
986 * li r3, -1
987 * nop
988 * nop
989 * nop
990 */
991
992 hc[0] = 0x3860ffff;
993 hc[1] = 0x60000000;
994 hc[2] = 0x60000000;
995 hc[3] = 0x60000000;
996
997 return 0;
998 }
999
1000 void kvmppc_set_papr(CPUPPCState *env)
1001 {
1002 struct kvm_enable_cap cap = {};
1003 int ret;
1004
1005 cap.cap = KVM_CAP_PPC_PAPR;
1006 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1007
1008 if (ret) {
1009 cpu_abort(env, "This KVM version does not support PAPR\n");
1010 }
1011 }
1012
1013 int kvmppc_smt_threads(void)
1014 {
1015 return cap_ppc_smt ? cap_ppc_smt : 1;
1016 }
1017
1018 #ifdef TARGET_PPC64
1019 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1020 {
1021 void *rma;
1022 off_t size;
1023 int fd;
1024 struct kvm_allocate_rma ret;
1025 MemoryRegion *rma_region;
1026
1027 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1028 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1029 * not necessary on this hardware
1030 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1031 *
1032 * FIXME: We should allow the user to force contiguous RMA
1033 * allocation in the cap_ppc_rma==1 case.
1034 */
1035 if (cap_ppc_rma < 2) {
1036 return 0;
1037 }
1038
1039 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1040 if (fd < 0) {
1041 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1042 strerror(errno));
1043 return -1;
1044 }
1045
1046 size = MIN(ret.rma_size, 256ul << 20);
1047
1048 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1049 if (rma == MAP_FAILED) {
1050 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1051 return -1;
1052 };
1053
1054 rma_region = g_new(MemoryRegion, 1);
1055 memory_region_init_ram_ptr(rma_region, name, size, rma);
1056 vmstate_register_ram_global(rma_region);
1057 memory_region_add_subregion(sysmem, 0, rma_region);
1058
1059 return size;
1060 }
1061
1062 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1063 {
1064 if (cap_ppc_rma >= 2) {
1065 return current_size;
1066 }
1067 return MIN(current_size,
1068 getrampagesize() << (hash_shift - 7));
1069 }
1070 #endif
1071
1072 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1073 {
1074 struct kvm_create_spapr_tce args = {
1075 .liobn = liobn,
1076 .window_size = window_size,
1077 };
1078 long len;
1079 int fd;
1080 void *table;
1081
1082 /* Must set fd to -1 so we don't try to munmap when called for
1083 * destroying the table, which the upper layers -will- do
1084 */
1085 *pfd = -1;
1086 if (!cap_spapr_tce) {
1087 return NULL;
1088 }
1089
1090 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1091 if (fd < 0) {
1092 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1093 liobn);
1094 return NULL;
1095 }
1096
1097 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1098 /* FIXME: round this up to page size */
1099
1100 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1101 if (table == MAP_FAILED) {
1102 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1103 liobn);
1104 close(fd);
1105 return NULL;
1106 }
1107
1108 *pfd = fd;
1109 return table;
1110 }
1111
1112 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1113 {
1114 long len;
1115
1116 if (fd < 0) {
1117 return -1;
1118 }
1119
1120 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1121 if ((munmap(table, len) < 0) ||
1122 (close(fd) < 0)) {
1123 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1124 strerror(errno));
1125 /* Leak the table */
1126 }
1127
1128 return 0;
1129 }
1130
1131 int kvmppc_reset_htab(int shift_hint)
1132 {
1133 uint32_t shift = shift_hint;
1134
1135 if (!kvm_enabled()) {
1136 /* Full emulation, tell caller to allocate htab itself */
1137 return 0;
1138 }
1139 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1140 int ret;
1141 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1142 if (ret == -ENOTTY) {
1143 /* At least some versions of PR KVM advertise the
1144 * capability, but don't implement the ioctl(). Oops.
1145 * Return 0 so that we allocate the htab in qemu, as is
1146 * correct for PR. */
1147 return 0;
1148 } else if (ret < 0) {
1149 return ret;
1150 }
1151 return shift;
1152 }
1153
1154 /* We have a kernel that predates the htab reset calls. For PR
1155 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1156 * this era, it has allocated a 16MB fixed size hash table
1157 * already. Kernels of this era have the GET_PVINFO capability
1158 * only on PR, so we use this hack to determine the right
1159 * answer */
1160 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1161 /* PR - tell caller to allocate htab */
1162 return 0;
1163 } else {
1164 /* HV - assume 16MB kernel allocated htab */
1165 return 24;
1166 }
1167 }
1168
1169 static inline uint32_t mfpvr(void)
1170 {
1171 uint32_t pvr;
1172
1173 asm ("mfpvr %0"
1174 : "=r"(pvr));
1175 return pvr;
1176 }
1177
1178 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1179 {
1180 if (on) {
1181 *word |= flags;
1182 } else {
1183 *word &= ~flags;
1184 }
1185 }
1186
1187 const ppc_def_t *kvmppc_host_cpu_def(void)
1188 {
1189 uint32_t host_pvr = mfpvr();
1190 const ppc_def_t *base_spec;
1191 ppc_def_t *spec;
1192 uint32_t vmx = kvmppc_get_vmx();
1193 uint32_t dfp = kvmppc_get_dfp();
1194
1195 base_spec = ppc_find_by_pvr(host_pvr);
1196
1197 spec = g_malloc0(sizeof(*spec));
1198 memcpy(spec, base_spec, sizeof(*spec));
1199
1200 /* Now fix up the spec with information we can query from the host */
1201
1202 if (vmx != -1) {
1203 /* Only override when we know what the host supports */
1204 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1205 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1206 }
1207 if (dfp != -1) {
1208 /* Only override when we know what the host supports */
1209 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1210 }
1211
1212 return spec;
1213 }
1214
1215 int kvmppc_fixup_cpu(CPUPPCState *env)
1216 {
1217 int smt;
1218
1219 /* Adjust cpu index for SMT */
1220 smt = kvmppc_smt_threads();
1221 env->cpu_index = (env->cpu_index / smp_threads) * smt
1222 + (env->cpu_index % smp_threads);
1223
1224 return 0;
1225 }
1226
1227
1228 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1229 {
1230 return true;
1231 }
1232
1233 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1234 {
1235 return 1;
1236 }
1237
1238 int kvm_arch_on_sigbus(int code, void *addr)
1239 {
1240 return 1;
1241 }