]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
PPC: KVM: Fix BAT put
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "sysemu.h"
28 #include "kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "cpus.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
69 *
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
73 */
74 static QEMUTimer *idle_timer;
75
76 static void kvm_kick_env(void *env)
77 {
78 qemu_cpu_kick(env);
79 }
80
81 int kvm_arch_init(KVMState *s)
82 {
83 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
84 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
85 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
86 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
87 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
88 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
89 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
90 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
91
92 if (!cap_interrupt_level) {
93 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
94 "VM to stall at times!\n");
95 }
96
97 return 0;
98 }
99
100 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
101 {
102 struct kvm_sregs sregs;
103 int ret;
104
105 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
106 /* What we're really trying to say is "if we're on BookE, we use
107 the native PVR for now". This is the only sane way to check
108 it though, so we potentially confuse users that they can run
109 BookE guests on BookS. Let's hope nobody dares enough :) */
110 return 0;
111 } else {
112 if (!cap_segstate) {
113 fprintf(stderr, "kvm error: missing PVR setting capability\n");
114 return -ENOSYS;
115 }
116 }
117
118 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
119 if (ret) {
120 return ret;
121 }
122
123 sregs.pvr = cenv->spr[SPR_PVR];
124 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
125 }
126
127 /* Set up a shared TLB array with KVM */
128 static int kvm_booke206_tlb_init(CPUPPCState *env)
129 {
130 struct kvm_book3e_206_tlb_params params = {};
131 struct kvm_config_tlb cfg = {};
132 struct kvm_enable_cap encap = {};
133 unsigned int entries = 0;
134 int ret, i;
135
136 if (!kvm_enabled() ||
137 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
138 return 0;
139 }
140
141 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
142
143 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
144 params.tlb_sizes[i] = booke206_tlb_size(env, i);
145 params.tlb_ways[i] = booke206_tlb_ways(env, i);
146 entries += params.tlb_sizes[i];
147 }
148
149 assert(entries == env->nb_tlb);
150 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
151
152 env->tlb_dirty = true;
153
154 cfg.array = (uintptr_t)env->tlb.tlbm;
155 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
156 cfg.params = (uintptr_t)&params;
157 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
158
159 encap.cap = KVM_CAP_SW_TLB;
160 encap.args[0] = (uintptr_t)&cfg;
161
162 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
163 if (ret < 0) {
164 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
165 __func__, strerror(-ret));
166 return ret;
167 }
168
169 env->kvm_sw_tlb = true;
170 return 0;
171 }
172
173
174 #if defined(TARGET_PPC64)
175 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
176 struct kvm_ppc_smmu_info *info)
177 {
178 memset(info, 0, sizeof(*info));
179
180 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
181 * need to "guess" what the supported page sizes are.
182 *
183 * For that to work we make a few assumptions:
184 *
185 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
186 * KVM which only supports 4K and 16M pages, but supports them
187 * regardless of the backing store characteritics. We also don't
188 * support 1T segments.
189 *
190 * This is safe as if HV KVM ever supports that capability or PR
191 * KVM grows supports for more page/segment sizes, those versions
192 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
193 * will not hit this fallback
194 *
195 * - Else we are running HV KVM. This means we only support page
196 * sizes that fit in the backing store. Additionally we only
197 * advertize 64K pages if the processor is ARCH 2.06 and we assume
198 * P7 encodings for the SLB and hash table. Here too, we assume
199 * support for any newer processor will mean a kernel that
200 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
201 * this fallback.
202 */
203 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
204 /* No flags */
205 info->flags = 0;
206 info->slb_size = 64;
207
208 /* Standard 4k base page size segment */
209 info->sps[0].page_shift = 12;
210 info->sps[0].slb_enc = 0;
211 info->sps[0].enc[0].page_shift = 12;
212 info->sps[0].enc[0].pte_enc = 0;
213
214 /* Standard 16M large page size segment */
215 info->sps[1].page_shift = 24;
216 info->sps[1].slb_enc = SLB_VSID_L;
217 info->sps[1].enc[0].page_shift = 24;
218 info->sps[1].enc[0].pte_enc = 0;
219 } else {
220 int i = 0;
221
222 /* HV KVM has backing store size restrictions */
223 info->flags = KVM_PPC_PAGE_SIZES_REAL;
224
225 if (env->mmu_model & POWERPC_MMU_1TSEG) {
226 info->flags |= KVM_PPC_1T_SEGMENTS;
227 }
228
229 if (env->mmu_model == POWERPC_MMU_2_06) {
230 info->slb_size = 32;
231 } else {
232 info->slb_size = 64;
233 }
234
235 /* Standard 4k base page size segment */
236 info->sps[i].page_shift = 12;
237 info->sps[i].slb_enc = 0;
238 info->sps[i].enc[0].page_shift = 12;
239 info->sps[i].enc[0].pte_enc = 0;
240 i++;
241
242 /* 64K on MMU 2.06 */
243 if (env->mmu_model == POWERPC_MMU_2_06) {
244 info->sps[i].page_shift = 16;
245 info->sps[i].slb_enc = 0x110;
246 info->sps[i].enc[0].page_shift = 16;
247 info->sps[i].enc[0].pte_enc = 1;
248 i++;
249 }
250
251 /* Standard 16M large page size segment */
252 info->sps[i].page_shift = 24;
253 info->sps[i].slb_enc = SLB_VSID_L;
254 info->sps[i].enc[0].page_shift = 24;
255 info->sps[i].enc[0].pte_enc = 0;
256 }
257 }
258
259 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
260 {
261 int ret;
262
263 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
264 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
265 if (ret == 0) {
266 return;
267 }
268 }
269
270 kvm_get_fallback_smmu_info(env, info);
271 }
272
273 static long getrampagesize(void)
274 {
275 struct statfs fs;
276 int ret;
277
278 if (!mem_path) {
279 /* guest RAM is backed by normal anonymous pages */
280 return getpagesize();
281 }
282
283 do {
284 ret = statfs(mem_path, &fs);
285 } while (ret != 0 && errno == EINTR);
286
287 if (ret != 0) {
288 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
289 strerror(errno));
290 exit(1);
291 }
292
293 #define HUGETLBFS_MAGIC 0x958458f6
294
295 if (fs.f_type != HUGETLBFS_MAGIC) {
296 /* Explicit mempath, but it's ordinary pages */
297 return getpagesize();
298 }
299
300 /* It's hugepage, return the huge page size */
301 return fs.f_bsize;
302 }
303
304 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
305 {
306 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
307 return true;
308 }
309
310 return (1ul << shift) <= rampgsize;
311 }
312
313 static void kvm_fixup_page_sizes(CPUPPCState *env)
314 {
315 static struct kvm_ppc_smmu_info smmu_info;
316 static bool has_smmu_info;
317 long rampagesize;
318 int iq, ik, jq, jk;
319
320 /* We only handle page sizes for 64-bit server guests for now */
321 if (!(env->mmu_model & POWERPC_MMU_64)) {
322 return;
323 }
324
325 /* Collect MMU info from kernel if not already */
326 if (!has_smmu_info) {
327 kvm_get_smmu_info(env, &smmu_info);
328 has_smmu_info = true;
329 }
330
331 rampagesize = getrampagesize();
332
333 /* Convert to QEMU form */
334 memset(&env->sps, 0, sizeof(env->sps));
335
336 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
337 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
338 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
339
340 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
341 ksps->page_shift)) {
342 continue;
343 }
344 qsps->page_shift = ksps->page_shift;
345 qsps->slb_enc = ksps->slb_enc;
346 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
347 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
348 ksps->enc[jk].page_shift)) {
349 continue;
350 }
351 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
352 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
353 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
354 break;
355 }
356 }
357 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
358 break;
359 }
360 }
361 env->slb_nr = smmu_info.slb_size;
362 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
363 env->mmu_model |= POWERPC_MMU_1TSEG;
364 } else {
365 env->mmu_model &= ~POWERPC_MMU_1TSEG;
366 }
367 }
368 #else /* defined (TARGET_PPC64) */
369
370 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
371 {
372 }
373
374 #endif /* !defined (TARGET_PPC64) */
375
376 int kvm_arch_init_vcpu(CPUPPCState *cenv)
377 {
378 int ret;
379
380 /* Gather server mmu info from KVM and update the CPU state */
381 kvm_fixup_page_sizes(cenv);
382
383 /* Synchronize sregs with kvm */
384 ret = kvm_arch_sync_sregs(cenv);
385 if (ret) {
386 return ret;
387 }
388
389 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
390
391 /* Some targets support access to KVM's guest TLB. */
392 switch (cenv->mmu_model) {
393 case POWERPC_MMU_BOOKE206:
394 ret = kvm_booke206_tlb_init(cenv);
395 break;
396 default:
397 break;
398 }
399
400 return ret;
401 }
402
403 void kvm_arch_reset_vcpu(CPUPPCState *env)
404 {
405 }
406
407 static void kvm_sw_tlb_put(CPUPPCState *env)
408 {
409 struct kvm_dirty_tlb dirty_tlb;
410 unsigned char *bitmap;
411 int ret;
412
413 if (!env->kvm_sw_tlb) {
414 return;
415 }
416
417 bitmap = g_malloc((env->nb_tlb + 7) / 8);
418 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
419
420 dirty_tlb.bitmap = (uintptr_t)bitmap;
421 dirty_tlb.num_dirty = env->nb_tlb;
422
423 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
424 if (ret) {
425 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
426 __func__, strerror(-ret));
427 }
428
429 g_free(bitmap);
430 }
431
432 int kvm_arch_put_registers(CPUPPCState *env, int level)
433 {
434 struct kvm_regs regs;
435 int ret;
436 int i;
437
438 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
439 if (ret < 0)
440 return ret;
441
442 regs.ctr = env->ctr;
443 regs.lr = env->lr;
444 regs.xer = env->xer;
445 regs.msr = env->msr;
446 regs.pc = env->nip;
447
448 regs.srr0 = env->spr[SPR_SRR0];
449 regs.srr1 = env->spr[SPR_SRR1];
450
451 regs.sprg0 = env->spr[SPR_SPRG0];
452 regs.sprg1 = env->spr[SPR_SPRG1];
453 regs.sprg2 = env->spr[SPR_SPRG2];
454 regs.sprg3 = env->spr[SPR_SPRG3];
455 regs.sprg4 = env->spr[SPR_SPRG4];
456 regs.sprg5 = env->spr[SPR_SPRG5];
457 regs.sprg6 = env->spr[SPR_SPRG6];
458 regs.sprg7 = env->spr[SPR_SPRG7];
459
460 regs.pid = env->spr[SPR_BOOKE_PID];
461
462 for (i = 0;i < 32; i++)
463 regs.gpr[i] = env->gpr[i];
464
465 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
466 if (ret < 0)
467 return ret;
468
469 if (env->tlb_dirty) {
470 kvm_sw_tlb_put(env);
471 env->tlb_dirty = false;
472 }
473
474 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
475 struct kvm_sregs sregs;
476
477 sregs.pvr = env->spr[SPR_PVR];
478
479 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
480
481 /* Sync SLB */
482 #ifdef TARGET_PPC64
483 for (i = 0; i < 64; i++) {
484 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
485 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
486 }
487 #endif
488
489 /* Sync SRs */
490 for (i = 0; i < 16; i++) {
491 sregs.u.s.ppc32.sr[i] = env->sr[i];
492 }
493
494 /* Sync BATs */
495 for (i = 0; i < 8; i++) {
496 /* Beware. We have to swap upper and lower bits here */
497 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
498 | env->DBAT[1][i];
499 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
500 | env->IBAT[1][i];
501 }
502
503 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
504 if (ret) {
505 return ret;
506 }
507 }
508
509 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
510 uint64_t hior = env->spr[SPR_HIOR];
511 struct kvm_one_reg reg = {
512 .id = KVM_REG_PPC_HIOR,
513 .addr = (uintptr_t) &hior,
514 };
515
516 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
517 if (ret) {
518 return ret;
519 }
520 }
521
522 return ret;
523 }
524
525 int kvm_arch_get_registers(CPUPPCState *env)
526 {
527 struct kvm_regs regs;
528 struct kvm_sregs sregs;
529 uint32_t cr;
530 int i, ret;
531
532 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
533 if (ret < 0)
534 return ret;
535
536 cr = regs.cr;
537 for (i = 7; i >= 0; i--) {
538 env->crf[i] = cr & 15;
539 cr >>= 4;
540 }
541
542 env->ctr = regs.ctr;
543 env->lr = regs.lr;
544 env->xer = regs.xer;
545 env->msr = regs.msr;
546 env->nip = regs.pc;
547
548 env->spr[SPR_SRR0] = regs.srr0;
549 env->spr[SPR_SRR1] = regs.srr1;
550
551 env->spr[SPR_SPRG0] = regs.sprg0;
552 env->spr[SPR_SPRG1] = regs.sprg1;
553 env->spr[SPR_SPRG2] = regs.sprg2;
554 env->spr[SPR_SPRG3] = regs.sprg3;
555 env->spr[SPR_SPRG4] = regs.sprg4;
556 env->spr[SPR_SPRG5] = regs.sprg5;
557 env->spr[SPR_SPRG6] = regs.sprg6;
558 env->spr[SPR_SPRG7] = regs.sprg7;
559
560 env->spr[SPR_BOOKE_PID] = regs.pid;
561
562 for (i = 0;i < 32; i++)
563 env->gpr[i] = regs.gpr[i];
564
565 if (cap_booke_sregs) {
566 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
567 if (ret < 0) {
568 return ret;
569 }
570
571 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
572 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
573 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
574 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
575 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
576 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
577 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
578 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
579 env->spr[SPR_DECR] = sregs.u.e.dec;
580 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
581 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
582 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
583 }
584
585 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
586 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
587 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
588 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
589 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
590 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
591 }
592
593 if (sregs.u.e.features & KVM_SREGS_E_64) {
594 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
595 }
596
597 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
598 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
599 }
600
601 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
602 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
603 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
604 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
605 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
606 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
607 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
608 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
609 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
610 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
611 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
612 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
613 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
614 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
615 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
616 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
617 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
618
619 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
620 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
621 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
622 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
623 }
624
625 if (sregs.u.e.features & KVM_SREGS_E_PM) {
626 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
627 }
628
629 if (sregs.u.e.features & KVM_SREGS_E_PC) {
630 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
631 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
632 }
633 }
634
635 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
636 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
637 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
638 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
639 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
640 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
641 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
642 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
643 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
644 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
645 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
646 }
647
648 if (sregs.u.e.features & KVM_SREGS_EXP) {
649 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
650 }
651
652 if (sregs.u.e.features & KVM_SREGS_E_PD) {
653 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
654 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
655 }
656
657 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
658 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
659 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
660 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
661
662 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
663 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
664 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
665 }
666 }
667 }
668
669 if (cap_segstate) {
670 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
671 if (ret < 0) {
672 return ret;
673 }
674
675 ppc_store_sdr1(env, sregs.u.s.sdr1);
676
677 /* Sync SLB */
678 #ifdef TARGET_PPC64
679 for (i = 0; i < 64; i++) {
680 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
681 sregs.u.s.ppc64.slb[i].slbv);
682 }
683 #endif
684
685 /* Sync SRs */
686 for (i = 0; i < 16; i++) {
687 env->sr[i] = sregs.u.s.ppc32.sr[i];
688 }
689
690 /* Sync BATs */
691 for (i = 0; i < 8; i++) {
692 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
693 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
694 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
695 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
696 }
697 }
698
699 return 0;
700 }
701
702 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
703 {
704 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
705
706 if (irq != PPC_INTERRUPT_EXT) {
707 return 0;
708 }
709
710 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
711 return 0;
712 }
713
714 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
715
716 return 0;
717 }
718
719 #if defined(TARGET_PPCEMB)
720 #define PPC_INPUT_INT PPC40x_INPUT_INT
721 #elif defined(TARGET_PPC64)
722 #define PPC_INPUT_INT PPC970_INPUT_INT
723 #else
724 #define PPC_INPUT_INT PPC6xx_INPUT_INT
725 #endif
726
727 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
728 {
729 int r;
730 unsigned irq;
731
732 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
733 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
734 if (!cap_interrupt_level &&
735 run->ready_for_interrupt_injection &&
736 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
737 (env->irq_input_state & (1<<PPC_INPUT_INT)))
738 {
739 /* For now KVM disregards the 'irq' argument. However, in the
740 * future KVM could cache it in-kernel to avoid a heavyweight exit
741 * when reading the UIC.
742 */
743 irq = KVM_INTERRUPT_SET;
744
745 dprintf("injected interrupt %d\n", irq);
746 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
747 if (r < 0)
748 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
749
750 /* Always wake up soon in case the interrupt was level based */
751 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
752 (get_ticks_per_sec() / 50));
753 }
754
755 /* We don't know if there are more interrupts pending after this. However,
756 * the guest will return to userspace in the course of handling this one
757 * anyways, so we will get a chance to deliver the rest. */
758 }
759
760 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
761 {
762 }
763
764 int kvm_arch_process_async_events(CPUPPCState *env)
765 {
766 return env->halted;
767 }
768
769 static int kvmppc_handle_halt(CPUPPCState *env)
770 {
771 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
772 env->halted = 1;
773 env->exception_index = EXCP_HLT;
774 }
775
776 return 0;
777 }
778
779 /* map dcr access to existing qemu dcr emulation */
780 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
781 {
782 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
783 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
784
785 return 0;
786 }
787
788 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
789 {
790 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
791 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
792
793 return 0;
794 }
795
796 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
797 {
798 int ret;
799
800 switch (run->exit_reason) {
801 case KVM_EXIT_DCR:
802 if (run->dcr.is_write) {
803 dprintf("handle dcr write\n");
804 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
805 } else {
806 dprintf("handle dcr read\n");
807 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
808 }
809 break;
810 case KVM_EXIT_HLT:
811 dprintf("handle halt\n");
812 ret = kvmppc_handle_halt(env);
813 break;
814 #ifdef CONFIG_PSERIES
815 case KVM_EXIT_PAPR_HCALL:
816 dprintf("handle PAPR hypercall\n");
817 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
818 run->papr_hcall.args);
819 ret = 0;
820 break;
821 #endif
822 default:
823 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
824 ret = -1;
825 break;
826 }
827
828 return ret;
829 }
830
831 static int read_cpuinfo(const char *field, char *value, int len)
832 {
833 FILE *f;
834 int ret = -1;
835 int field_len = strlen(field);
836 char line[512];
837
838 f = fopen("/proc/cpuinfo", "r");
839 if (!f) {
840 return -1;
841 }
842
843 do {
844 if(!fgets(line, sizeof(line), f)) {
845 break;
846 }
847 if (!strncmp(line, field, field_len)) {
848 strncpy(value, line, len);
849 ret = 0;
850 break;
851 }
852 } while(*line);
853
854 fclose(f);
855
856 return ret;
857 }
858
859 uint32_t kvmppc_get_tbfreq(void)
860 {
861 char line[512];
862 char *ns;
863 uint32_t retval = get_ticks_per_sec();
864
865 if (read_cpuinfo("timebase", line, sizeof(line))) {
866 return retval;
867 }
868
869 if (!(ns = strchr(line, ':'))) {
870 return retval;
871 }
872
873 ns++;
874
875 retval = atoi(ns);
876 return retval;
877 }
878
879 /* Try to find a device tree node for a CPU with clock-frequency property */
880 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
881 {
882 struct dirent *dirp;
883 DIR *dp;
884
885 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
886 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
887 return -1;
888 }
889
890 buf[0] = '\0';
891 while ((dirp = readdir(dp)) != NULL) {
892 FILE *f;
893 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
894 dirp->d_name);
895 f = fopen(buf, "r");
896 if (f) {
897 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
898 fclose(f);
899 break;
900 }
901 buf[0] = '\0';
902 }
903 closedir(dp);
904 if (buf[0] == '\0') {
905 printf("Unknown host!\n");
906 return -1;
907 }
908
909 return 0;
910 }
911
912 /* Read a CPU node property from the host device tree that's a single
913 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
914 * (can't find or open the property, or doesn't understand the
915 * format) */
916 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
917 {
918 char buf[PATH_MAX];
919 union {
920 uint32_t v32;
921 uint64_t v64;
922 } u;
923 FILE *f;
924 int len;
925
926 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
927 return -1;
928 }
929
930 strncat(buf, "/", sizeof(buf) - strlen(buf));
931 strncat(buf, propname, sizeof(buf) - strlen(buf));
932
933 f = fopen(buf, "rb");
934 if (!f) {
935 return -1;
936 }
937
938 len = fread(&u, 1, sizeof(u), f);
939 fclose(f);
940 switch (len) {
941 case 4:
942 /* property is a 32-bit quantity */
943 return be32_to_cpu(u.v32);
944 case 8:
945 return be64_to_cpu(u.v64);
946 }
947
948 return 0;
949 }
950
951 uint64_t kvmppc_get_clockfreq(void)
952 {
953 return kvmppc_read_int_cpu_dt("clock-frequency");
954 }
955
956 uint32_t kvmppc_get_vmx(void)
957 {
958 return kvmppc_read_int_cpu_dt("ibm,vmx");
959 }
960
961 uint32_t kvmppc_get_dfp(void)
962 {
963 return kvmppc_read_int_cpu_dt("ibm,dfp");
964 }
965
966 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
967 {
968 uint32_t *hc = (uint32_t*)buf;
969
970 struct kvm_ppc_pvinfo pvinfo;
971
972 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
973 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
974 memcpy(buf, pvinfo.hcall, buf_len);
975
976 return 0;
977 }
978
979 /*
980 * Fallback to always fail hypercalls:
981 *
982 * li r3, -1
983 * nop
984 * nop
985 * nop
986 */
987
988 hc[0] = 0x3860ffff;
989 hc[1] = 0x60000000;
990 hc[2] = 0x60000000;
991 hc[3] = 0x60000000;
992
993 return 0;
994 }
995
996 void kvmppc_set_papr(CPUPPCState *env)
997 {
998 struct kvm_enable_cap cap = {};
999 int ret;
1000
1001 cap.cap = KVM_CAP_PPC_PAPR;
1002 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1003
1004 if (ret) {
1005 cpu_abort(env, "This KVM version does not support PAPR\n");
1006 }
1007 }
1008
1009 int kvmppc_smt_threads(void)
1010 {
1011 return cap_ppc_smt ? cap_ppc_smt : 1;
1012 }
1013
1014 #ifdef TARGET_PPC64
1015 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1016 {
1017 void *rma;
1018 off_t size;
1019 int fd;
1020 struct kvm_allocate_rma ret;
1021 MemoryRegion *rma_region;
1022
1023 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1024 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1025 * not necessary on this hardware
1026 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1027 *
1028 * FIXME: We should allow the user to force contiguous RMA
1029 * allocation in the cap_ppc_rma==1 case.
1030 */
1031 if (cap_ppc_rma < 2) {
1032 return 0;
1033 }
1034
1035 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1036 if (fd < 0) {
1037 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1038 strerror(errno));
1039 return -1;
1040 }
1041
1042 size = MIN(ret.rma_size, 256ul << 20);
1043
1044 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1045 if (rma == MAP_FAILED) {
1046 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1047 return -1;
1048 };
1049
1050 rma_region = g_new(MemoryRegion, 1);
1051 memory_region_init_ram_ptr(rma_region, name, size, rma);
1052 vmstate_register_ram_global(rma_region);
1053 memory_region_add_subregion(sysmem, 0, rma_region);
1054
1055 return size;
1056 }
1057
1058 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1059 {
1060 if (cap_ppc_rma >= 2) {
1061 return current_size;
1062 }
1063 return MIN(current_size,
1064 getrampagesize() << (hash_shift - 7));
1065 }
1066 #endif
1067
1068 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1069 {
1070 struct kvm_create_spapr_tce args = {
1071 .liobn = liobn,
1072 .window_size = window_size,
1073 };
1074 long len;
1075 int fd;
1076 void *table;
1077
1078 /* Must set fd to -1 so we don't try to munmap when called for
1079 * destroying the table, which the upper layers -will- do
1080 */
1081 *pfd = -1;
1082 if (!cap_spapr_tce) {
1083 return NULL;
1084 }
1085
1086 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1087 if (fd < 0) {
1088 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1089 liobn);
1090 return NULL;
1091 }
1092
1093 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1094 /* FIXME: round this up to page size */
1095
1096 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1097 if (table == MAP_FAILED) {
1098 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1099 liobn);
1100 close(fd);
1101 return NULL;
1102 }
1103
1104 *pfd = fd;
1105 return table;
1106 }
1107
1108 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1109 {
1110 long len;
1111
1112 if (fd < 0) {
1113 return -1;
1114 }
1115
1116 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1117 if ((munmap(table, len) < 0) ||
1118 (close(fd) < 0)) {
1119 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1120 strerror(errno));
1121 /* Leak the table */
1122 }
1123
1124 return 0;
1125 }
1126
1127 int kvmppc_reset_htab(int shift_hint)
1128 {
1129 uint32_t shift = shift_hint;
1130
1131 if (!kvm_enabled()) {
1132 /* Full emulation, tell caller to allocate htab itself */
1133 return 0;
1134 }
1135 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1136 int ret;
1137 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1138 if (ret == -ENOTTY) {
1139 /* At least some versions of PR KVM advertise the
1140 * capability, but don't implement the ioctl(). Oops.
1141 * Return 0 so that we allocate the htab in qemu, as is
1142 * correct for PR. */
1143 return 0;
1144 } else if (ret < 0) {
1145 return ret;
1146 }
1147 return shift;
1148 }
1149
1150 /* We have a kernel that predates the htab reset calls. For PR
1151 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1152 * this era, it has allocated a 16MB fixed size hash table
1153 * already. Kernels of this era have the GET_PVINFO capability
1154 * only on PR, so we use this hack to determine the right
1155 * answer */
1156 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1157 /* PR - tell caller to allocate htab */
1158 return 0;
1159 } else {
1160 /* HV - assume 16MB kernel allocated htab */
1161 return 24;
1162 }
1163 }
1164
1165 static inline uint32_t mfpvr(void)
1166 {
1167 uint32_t pvr;
1168
1169 asm ("mfpvr %0"
1170 : "=r"(pvr));
1171 return pvr;
1172 }
1173
1174 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1175 {
1176 if (on) {
1177 *word |= flags;
1178 } else {
1179 *word &= ~flags;
1180 }
1181 }
1182
1183 const ppc_def_t *kvmppc_host_cpu_def(void)
1184 {
1185 uint32_t host_pvr = mfpvr();
1186 const ppc_def_t *base_spec;
1187 ppc_def_t *spec;
1188 uint32_t vmx = kvmppc_get_vmx();
1189 uint32_t dfp = kvmppc_get_dfp();
1190
1191 base_spec = ppc_find_by_pvr(host_pvr);
1192
1193 spec = g_malloc0(sizeof(*spec));
1194 memcpy(spec, base_spec, sizeof(*spec));
1195
1196 /* Now fix up the spec with information we can query from the host */
1197
1198 if (vmx != -1) {
1199 /* Only override when we know what the host supports */
1200 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1201 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1202 }
1203 if (dfp != -1) {
1204 /* Only override when we know what the host supports */
1205 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1206 }
1207
1208 return spec;
1209 }
1210
1211 int kvmppc_fixup_cpu(CPUPPCState *env)
1212 {
1213 int smt;
1214
1215 /* Adjust cpu index for SMT */
1216 smt = kvmppc_smt_threads();
1217 env->cpu_index = (env->cpu_index / smp_threads) * smt
1218 + (env->cpu_index % smp_threads);
1219
1220 return 0;
1221 }
1222
1223
1224 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1225 {
1226 return true;
1227 }
1228
1229 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1230 {
1231 return 1;
1232 }
1233
1234 int kvm_arch_on_sigbus(int code, void *addr)
1235 {
1236 return 1;
1237 }