]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
Merge remote-tracking branch 'spice/spice.v58' into staging
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "sysemu.h"
28 #include "kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "cpus.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63
64 /* XXX We have a race condition where we actually have a level triggered
65 * interrupt, but the infrastructure can't expose that yet, so the guest
66 * takes but ignores it, goes to sleep and never gets notified that there's
67 * still an interrupt pending.
68 *
69 * As a quick workaround, let's just wake up again 20 ms after we injected
70 * an interrupt. That way we can assure that we're always reinjecting
71 * interrupts in case the guest swallowed them.
72 */
73 static QEMUTimer *idle_timer;
74
75 static void kvm_kick_env(void *env)
76 {
77 qemu_cpu_kick(env);
78 }
79
80 int kvm_arch_init(KVMState *s)
81 {
82 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
83 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
84 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
85 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
86 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
87 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
88 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
89
90 if (!cap_interrupt_level) {
91 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
92 "VM to stall at times!\n");
93 }
94
95 return 0;
96 }
97
98 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
99 {
100 struct kvm_sregs sregs;
101 int ret;
102
103 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
104 /* What we're really trying to say is "if we're on BookE, we use
105 the native PVR for now". This is the only sane way to check
106 it though, so we potentially confuse users that they can run
107 BookE guests on BookS. Let's hope nobody dares enough :) */
108 return 0;
109 } else {
110 if (!cap_segstate) {
111 fprintf(stderr, "kvm error: missing PVR setting capability\n");
112 return -ENOSYS;
113 }
114 }
115
116 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
117 if (ret) {
118 return ret;
119 }
120
121 sregs.pvr = cenv->spr[SPR_PVR];
122 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
123 }
124
125 /* Set up a shared TLB array with KVM */
126 static int kvm_booke206_tlb_init(CPUPPCState *env)
127 {
128 struct kvm_book3e_206_tlb_params params = {};
129 struct kvm_config_tlb cfg = {};
130 struct kvm_enable_cap encap = {};
131 unsigned int entries = 0;
132 int ret, i;
133
134 if (!kvm_enabled() ||
135 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
136 return 0;
137 }
138
139 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
140
141 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
142 params.tlb_sizes[i] = booke206_tlb_size(env, i);
143 params.tlb_ways[i] = booke206_tlb_ways(env, i);
144 entries += params.tlb_sizes[i];
145 }
146
147 assert(entries == env->nb_tlb);
148 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
149
150 env->tlb_dirty = true;
151
152 cfg.array = (uintptr_t)env->tlb.tlbm;
153 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
154 cfg.params = (uintptr_t)&params;
155 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
156
157 encap.cap = KVM_CAP_SW_TLB;
158 encap.args[0] = (uintptr_t)&cfg;
159
160 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
161 if (ret < 0) {
162 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
163 __func__, strerror(-ret));
164 return ret;
165 }
166
167 env->kvm_sw_tlb = true;
168 return 0;
169 }
170
171
172 #if defined(TARGET_PPC64)
173 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
174 struct kvm_ppc_smmu_info *info)
175 {
176 memset(info, 0, sizeof(*info));
177
178 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
179 * need to "guess" what the supported page sizes are.
180 *
181 * For that to work we make a few assumptions:
182 *
183 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
184 * KVM which only supports 4K and 16M pages, but supports them
185 * regardless of the backing store characteritics. We also don't
186 * support 1T segments.
187 *
188 * This is safe as if HV KVM ever supports that capability or PR
189 * KVM grows supports for more page/segment sizes, those versions
190 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
191 * will not hit this fallback
192 *
193 * - Else we are running HV KVM. This means we only support page
194 * sizes that fit in the backing store. Additionally we only
195 * advertize 64K pages if the processor is ARCH 2.06 and we assume
196 * P7 encodings for the SLB and hash table. Here too, we assume
197 * support for any newer processor will mean a kernel that
198 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
199 * this fallback.
200 */
201 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
202 /* No flags */
203 info->flags = 0;
204 info->slb_size = 64;
205
206 /* Standard 4k base page size segment */
207 info->sps[0].page_shift = 12;
208 info->sps[0].slb_enc = 0;
209 info->sps[0].enc[0].page_shift = 12;
210 info->sps[0].enc[0].pte_enc = 0;
211
212 /* Standard 16M large page size segment */
213 info->sps[1].page_shift = 24;
214 info->sps[1].slb_enc = SLB_VSID_L;
215 info->sps[1].enc[0].page_shift = 24;
216 info->sps[1].enc[0].pte_enc = 0;
217 } else {
218 int i = 0;
219
220 /* HV KVM has backing store size restrictions */
221 info->flags = KVM_PPC_PAGE_SIZES_REAL;
222
223 if (env->mmu_model & POWERPC_MMU_1TSEG) {
224 info->flags |= KVM_PPC_1T_SEGMENTS;
225 }
226
227 if (env->mmu_model == POWERPC_MMU_2_06) {
228 info->slb_size = 32;
229 } else {
230 info->slb_size = 64;
231 }
232
233 /* Standard 4k base page size segment */
234 info->sps[i].page_shift = 12;
235 info->sps[i].slb_enc = 0;
236 info->sps[i].enc[0].page_shift = 12;
237 info->sps[i].enc[0].pte_enc = 0;
238 i++;
239
240 /* 64K on MMU 2.06 */
241 if (env->mmu_model == POWERPC_MMU_2_06) {
242 info->sps[i].page_shift = 16;
243 info->sps[i].slb_enc = 0x110;
244 info->sps[i].enc[0].page_shift = 16;
245 info->sps[i].enc[0].pte_enc = 1;
246 i++;
247 }
248
249 /* Standard 16M large page size segment */
250 info->sps[i].page_shift = 24;
251 info->sps[i].slb_enc = SLB_VSID_L;
252 info->sps[i].enc[0].page_shift = 24;
253 info->sps[i].enc[0].pte_enc = 0;
254 }
255 }
256
257 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
258 {
259 int ret;
260
261 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
262 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
263 if (ret == 0) {
264 return;
265 }
266 }
267
268 kvm_get_fallback_smmu_info(env, info);
269 }
270
271 static long getrampagesize(void)
272 {
273 struct statfs fs;
274 int ret;
275
276 if (!mem_path) {
277 /* guest RAM is backed by normal anonymous pages */
278 return getpagesize();
279 }
280
281 do {
282 ret = statfs(mem_path, &fs);
283 } while (ret != 0 && errno == EINTR);
284
285 if (ret != 0) {
286 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
287 strerror(errno));
288 exit(1);
289 }
290
291 #define HUGETLBFS_MAGIC 0x958458f6
292
293 if (fs.f_type != HUGETLBFS_MAGIC) {
294 /* Explicit mempath, but it's ordinary pages */
295 return getpagesize();
296 }
297
298 /* It's hugepage, return the huge page size */
299 return fs.f_bsize;
300 }
301
302 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
303 {
304 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
305 return true;
306 }
307
308 return (1ul << shift) <= rampgsize;
309 }
310
311 static void kvm_fixup_page_sizes(CPUPPCState *env)
312 {
313 static struct kvm_ppc_smmu_info smmu_info;
314 static bool has_smmu_info;
315 long rampagesize;
316 int iq, ik, jq, jk;
317
318 /* We only handle page sizes for 64-bit server guests for now */
319 if (!(env->mmu_model & POWERPC_MMU_64)) {
320 return;
321 }
322
323 /* Collect MMU info from kernel if not already */
324 if (!has_smmu_info) {
325 kvm_get_smmu_info(env, &smmu_info);
326 has_smmu_info = true;
327 }
328
329 rampagesize = getrampagesize();
330
331 /* Convert to QEMU form */
332 memset(&env->sps, 0, sizeof(env->sps));
333
334 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
335 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
336 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
337
338 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
339 ksps->page_shift)) {
340 continue;
341 }
342 qsps->page_shift = ksps->page_shift;
343 qsps->slb_enc = ksps->slb_enc;
344 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
345 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
346 ksps->enc[jk].page_shift)) {
347 continue;
348 }
349 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
350 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
351 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
352 break;
353 }
354 }
355 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
356 break;
357 }
358 }
359 env->slb_nr = smmu_info.slb_size;
360 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
361 env->mmu_model |= POWERPC_MMU_1TSEG;
362 } else {
363 env->mmu_model &= ~POWERPC_MMU_1TSEG;
364 }
365 }
366 #else /* defined (TARGET_PPC64) */
367
368 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
369 {
370 }
371
372 #endif /* !defined (TARGET_PPC64) */
373
374 int kvm_arch_init_vcpu(CPUPPCState *cenv)
375 {
376 int ret;
377
378 /* Gather server mmu info from KVM and update the CPU state */
379 kvm_fixup_page_sizes(cenv);
380
381 /* Synchronize sregs with kvm */
382 ret = kvm_arch_sync_sregs(cenv);
383 if (ret) {
384 return ret;
385 }
386
387 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
388
389 /* Some targets support access to KVM's guest TLB. */
390 switch (cenv->mmu_model) {
391 case POWERPC_MMU_BOOKE206:
392 ret = kvm_booke206_tlb_init(cenv);
393 break;
394 default:
395 break;
396 }
397
398 return ret;
399 }
400
401 void kvm_arch_reset_vcpu(CPUPPCState *env)
402 {
403 }
404
405 static void kvm_sw_tlb_put(CPUPPCState *env)
406 {
407 struct kvm_dirty_tlb dirty_tlb;
408 unsigned char *bitmap;
409 int ret;
410
411 if (!env->kvm_sw_tlb) {
412 return;
413 }
414
415 bitmap = g_malloc((env->nb_tlb + 7) / 8);
416 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
417
418 dirty_tlb.bitmap = (uintptr_t)bitmap;
419 dirty_tlb.num_dirty = env->nb_tlb;
420
421 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
422 if (ret) {
423 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
424 __func__, strerror(-ret));
425 }
426
427 g_free(bitmap);
428 }
429
430 int kvm_arch_put_registers(CPUPPCState *env, int level)
431 {
432 struct kvm_regs regs;
433 int ret;
434 int i;
435
436 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
437 if (ret < 0)
438 return ret;
439
440 regs.ctr = env->ctr;
441 regs.lr = env->lr;
442 regs.xer = env->xer;
443 regs.msr = env->msr;
444 regs.pc = env->nip;
445
446 regs.srr0 = env->spr[SPR_SRR0];
447 regs.srr1 = env->spr[SPR_SRR1];
448
449 regs.sprg0 = env->spr[SPR_SPRG0];
450 regs.sprg1 = env->spr[SPR_SPRG1];
451 regs.sprg2 = env->spr[SPR_SPRG2];
452 regs.sprg3 = env->spr[SPR_SPRG3];
453 regs.sprg4 = env->spr[SPR_SPRG4];
454 regs.sprg5 = env->spr[SPR_SPRG5];
455 regs.sprg6 = env->spr[SPR_SPRG6];
456 regs.sprg7 = env->spr[SPR_SPRG7];
457
458 regs.pid = env->spr[SPR_BOOKE_PID];
459
460 for (i = 0;i < 32; i++)
461 regs.gpr[i] = env->gpr[i];
462
463 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
464 if (ret < 0)
465 return ret;
466
467 if (env->tlb_dirty) {
468 kvm_sw_tlb_put(env);
469 env->tlb_dirty = false;
470 }
471
472 return ret;
473 }
474
475 int kvm_arch_get_registers(CPUPPCState *env)
476 {
477 struct kvm_regs regs;
478 struct kvm_sregs sregs;
479 uint32_t cr;
480 int i, ret;
481
482 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
483 if (ret < 0)
484 return ret;
485
486 cr = regs.cr;
487 for (i = 7; i >= 0; i--) {
488 env->crf[i] = cr & 15;
489 cr >>= 4;
490 }
491
492 env->ctr = regs.ctr;
493 env->lr = regs.lr;
494 env->xer = regs.xer;
495 env->msr = regs.msr;
496 env->nip = regs.pc;
497
498 env->spr[SPR_SRR0] = regs.srr0;
499 env->spr[SPR_SRR1] = regs.srr1;
500
501 env->spr[SPR_SPRG0] = regs.sprg0;
502 env->spr[SPR_SPRG1] = regs.sprg1;
503 env->spr[SPR_SPRG2] = regs.sprg2;
504 env->spr[SPR_SPRG3] = regs.sprg3;
505 env->spr[SPR_SPRG4] = regs.sprg4;
506 env->spr[SPR_SPRG5] = regs.sprg5;
507 env->spr[SPR_SPRG6] = regs.sprg6;
508 env->spr[SPR_SPRG7] = regs.sprg7;
509
510 env->spr[SPR_BOOKE_PID] = regs.pid;
511
512 for (i = 0;i < 32; i++)
513 env->gpr[i] = regs.gpr[i];
514
515 if (cap_booke_sregs) {
516 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
517 if (ret < 0) {
518 return ret;
519 }
520
521 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
522 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
523 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
524 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
525 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
526 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
527 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
528 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
529 env->spr[SPR_DECR] = sregs.u.e.dec;
530 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
531 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
532 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
533 }
534
535 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
536 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
537 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
538 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
539 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
540 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
541 }
542
543 if (sregs.u.e.features & KVM_SREGS_E_64) {
544 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
545 }
546
547 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
548 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
549 }
550
551 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
552 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
553 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
554 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
555 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
556 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
557 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
558 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
559 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
560 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
561 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
562 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
563 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
564 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
565 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
566 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
567 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
568
569 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
570 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
571 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
572 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
573 }
574
575 if (sregs.u.e.features & KVM_SREGS_E_PM) {
576 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
577 }
578
579 if (sregs.u.e.features & KVM_SREGS_E_PC) {
580 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
581 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
582 }
583 }
584
585 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
586 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
587 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
588 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
589 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
590 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
591 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
592 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
593 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
594 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
595 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
596 }
597
598 if (sregs.u.e.features & KVM_SREGS_EXP) {
599 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
600 }
601
602 if (sregs.u.e.features & KVM_SREGS_E_PD) {
603 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
604 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
605 }
606
607 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
608 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
609 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
610 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
611
612 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
613 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
614 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
615 }
616 }
617 }
618
619 if (cap_segstate) {
620 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
621 if (ret < 0) {
622 return ret;
623 }
624
625 ppc_store_sdr1(env, sregs.u.s.sdr1);
626
627 /* Sync SLB */
628 #ifdef TARGET_PPC64
629 for (i = 0; i < 64; i++) {
630 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
631 sregs.u.s.ppc64.slb[i].slbv);
632 }
633 #endif
634
635 /* Sync SRs */
636 for (i = 0; i < 16; i++) {
637 env->sr[i] = sregs.u.s.ppc32.sr[i];
638 }
639
640 /* Sync BATs */
641 for (i = 0; i < 8; i++) {
642 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
643 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
644 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
645 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
646 }
647 }
648
649 return 0;
650 }
651
652 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
653 {
654 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
655
656 if (irq != PPC_INTERRUPT_EXT) {
657 return 0;
658 }
659
660 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
661 return 0;
662 }
663
664 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
665
666 return 0;
667 }
668
669 #if defined(TARGET_PPCEMB)
670 #define PPC_INPUT_INT PPC40x_INPUT_INT
671 #elif defined(TARGET_PPC64)
672 #define PPC_INPUT_INT PPC970_INPUT_INT
673 #else
674 #define PPC_INPUT_INT PPC6xx_INPUT_INT
675 #endif
676
677 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
678 {
679 int r;
680 unsigned irq;
681
682 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
683 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
684 if (!cap_interrupt_level &&
685 run->ready_for_interrupt_injection &&
686 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
687 (env->irq_input_state & (1<<PPC_INPUT_INT)))
688 {
689 /* For now KVM disregards the 'irq' argument. However, in the
690 * future KVM could cache it in-kernel to avoid a heavyweight exit
691 * when reading the UIC.
692 */
693 irq = KVM_INTERRUPT_SET;
694
695 dprintf("injected interrupt %d\n", irq);
696 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
697 if (r < 0)
698 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
699
700 /* Always wake up soon in case the interrupt was level based */
701 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
702 (get_ticks_per_sec() / 50));
703 }
704
705 /* We don't know if there are more interrupts pending after this. However,
706 * the guest will return to userspace in the course of handling this one
707 * anyways, so we will get a chance to deliver the rest. */
708 }
709
710 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
711 {
712 }
713
714 int kvm_arch_process_async_events(CPUPPCState *env)
715 {
716 return env->halted;
717 }
718
719 static int kvmppc_handle_halt(CPUPPCState *env)
720 {
721 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
722 env->halted = 1;
723 env->exception_index = EXCP_HLT;
724 }
725
726 return 0;
727 }
728
729 /* map dcr access to existing qemu dcr emulation */
730 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
731 {
732 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
733 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
734
735 return 0;
736 }
737
738 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
739 {
740 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
741 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
742
743 return 0;
744 }
745
746 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
747 {
748 int ret;
749
750 switch (run->exit_reason) {
751 case KVM_EXIT_DCR:
752 if (run->dcr.is_write) {
753 dprintf("handle dcr write\n");
754 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
755 } else {
756 dprintf("handle dcr read\n");
757 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
758 }
759 break;
760 case KVM_EXIT_HLT:
761 dprintf("handle halt\n");
762 ret = kvmppc_handle_halt(env);
763 break;
764 #ifdef CONFIG_PSERIES
765 case KVM_EXIT_PAPR_HCALL:
766 dprintf("handle PAPR hypercall\n");
767 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
768 run->papr_hcall.args);
769 ret = 1;
770 break;
771 #endif
772 default:
773 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
774 ret = -1;
775 break;
776 }
777
778 return ret;
779 }
780
781 static int read_cpuinfo(const char *field, char *value, int len)
782 {
783 FILE *f;
784 int ret = -1;
785 int field_len = strlen(field);
786 char line[512];
787
788 f = fopen("/proc/cpuinfo", "r");
789 if (!f) {
790 return -1;
791 }
792
793 do {
794 if(!fgets(line, sizeof(line), f)) {
795 break;
796 }
797 if (!strncmp(line, field, field_len)) {
798 strncpy(value, line, len);
799 ret = 0;
800 break;
801 }
802 } while(*line);
803
804 fclose(f);
805
806 return ret;
807 }
808
809 uint32_t kvmppc_get_tbfreq(void)
810 {
811 char line[512];
812 char *ns;
813 uint32_t retval = get_ticks_per_sec();
814
815 if (read_cpuinfo("timebase", line, sizeof(line))) {
816 return retval;
817 }
818
819 if (!(ns = strchr(line, ':'))) {
820 return retval;
821 }
822
823 ns++;
824
825 retval = atoi(ns);
826 return retval;
827 }
828
829 /* Try to find a device tree node for a CPU with clock-frequency property */
830 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
831 {
832 struct dirent *dirp;
833 DIR *dp;
834
835 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
836 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
837 return -1;
838 }
839
840 buf[0] = '\0';
841 while ((dirp = readdir(dp)) != NULL) {
842 FILE *f;
843 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
844 dirp->d_name);
845 f = fopen(buf, "r");
846 if (f) {
847 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
848 fclose(f);
849 break;
850 }
851 buf[0] = '\0';
852 }
853 closedir(dp);
854 if (buf[0] == '\0') {
855 printf("Unknown host!\n");
856 return -1;
857 }
858
859 return 0;
860 }
861
862 /* Read a CPU node property from the host device tree that's a single
863 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
864 * (can't find or open the property, or doesn't understand the
865 * format) */
866 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
867 {
868 char buf[PATH_MAX];
869 union {
870 uint32_t v32;
871 uint64_t v64;
872 } u;
873 FILE *f;
874 int len;
875
876 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
877 return -1;
878 }
879
880 strncat(buf, "/", sizeof(buf) - strlen(buf));
881 strncat(buf, propname, sizeof(buf) - strlen(buf));
882
883 f = fopen(buf, "rb");
884 if (!f) {
885 return -1;
886 }
887
888 len = fread(&u, 1, sizeof(u), f);
889 fclose(f);
890 switch (len) {
891 case 4:
892 /* property is a 32-bit quantity */
893 return be32_to_cpu(u.v32);
894 case 8:
895 return be64_to_cpu(u.v64);
896 }
897
898 return 0;
899 }
900
901 uint64_t kvmppc_get_clockfreq(void)
902 {
903 return kvmppc_read_int_cpu_dt("clock-frequency");
904 }
905
906 uint32_t kvmppc_get_vmx(void)
907 {
908 return kvmppc_read_int_cpu_dt("ibm,vmx");
909 }
910
911 uint32_t kvmppc_get_dfp(void)
912 {
913 return kvmppc_read_int_cpu_dt("ibm,dfp");
914 }
915
916 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
917 {
918 uint32_t *hc = (uint32_t*)buf;
919
920 struct kvm_ppc_pvinfo pvinfo;
921
922 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
923 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
924 memcpy(buf, pvinfo.hcall, buf_len);
925
926 return 0;
927 }
928
929 /*
930 * Fallback to always fail hypercalls:
931 *
932 * li r3, -1
933 * nop
934 * nop
935 * nop
936 */
937
938 hc[0] = 0x3860ffff;
939 hc[1] = 0x60000000;
940 hc[2] = 0x60000000;
941 hc[3] = 0x60000000;
942
943 return 0;
944 }
945
946 void kvmppc_set_papr(CPUPPCState *env)
947 {
948 struct kvm_enable_cap cap = {};
949 struct kvm_one_reg reg = {};
950 struct kvm_sregs sregs = {};
951 int ret;
952 uint64_t hior = env->spr[SPR_HIOR];
953
954 cap.cap = KVM_CAP_PPC_PAPR;
955 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
956
957 if (ret) {
958 goto fail;
959 }
960
961 /*
962 * XXX We set HIOR here. It really should be a qdev property of
963 * the CPU node, but we don't have CPUs converted to qdev yet.
964 *
965 * Once we have qdev CPUs, move HIOR to a qdev property and
966 * remove this chunk.
967 */
968 reg.id = KVM_REG_PPC_HIOR;
969 reg.addr = (uintptr_t)&hior;
970 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
971 if (ret) {
972 fprintf(stderr, "Couldn't set HIOR. Maybe you're running an old \n"
973 "kernel with support for HV KVM but no PAPR PR \n"
974 "KVM in which case things will work. If they don't \n"
975 "please update your host kernel!\n");
976 }
977
978 /* Set SDR1 so kernel space finds the HTAB */
979 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
980 if (ret) {
981 goto fail;
982 }
983
984 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
985
986 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
987 if (ret) {
988 goto fail;
989 }
990
991 return;
992
993 fail:
994 cpu_abort(env, "This KVM version does not support PAPR\n");
995 }
996
997 int kvmppc_smt_threads(void)
998 {
999 return cap_ppc_smt ? cap_ppc_smt : 1;
1000 }
1001
1002 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1003 {
1004 void *rma;
1005 off_t size;
1006 int fd;
1007 struct kvm_allocate_rma ret;
1008 MemoryRegion *rma_region;
1009
1010 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1011 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1012 * not necessary on this hardware
1013 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1014 *
1015 * FIXME: We should allow the user to force contiguous RMA
1016 * allocation in the cap_ppc_rma==1 case.
1017 */
1018 if (cap_ppc_rma < 2) {
1019 return 0;
1020 }
1021
1022 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1023 if (fd < 0) {
1024 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1025 strerror(errno));
1026 return -1;
1027 }
1028
1029 size = MIN(ret.rma_size, 256ul << 20);
1030
1031 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1032 if (rma == MAP_FAILED) {
1033 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1034 return -1;
1035 };
1036
1037 rma_region = g_new(MemoryRegion, 1);
1038 memory_region_init_ram_ptr(rma_region, name, size, rma);
1039 vmstate_register_ram_global(rma_region);
1040 memory_region_add_subregion(sysmem, 0, rma_region);
1041
1042 return size;
1043 }
1044
1045 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1046 {
1047 struct kvm_create_spapr_tce args = {
1048 .liobn = liobn,
1049 .window_size = window_size,
1050 };
1051 long len;
1052 int fd;
1053 void *table;
1054
1055 /* Must set fd to -1 so we don't try to munmap when called for
1056 * destroying the table, which the upper layers -will- do
1057 */
1058 *pfd = -1;
1059 if (!cap_spapr_tce) {
1060 return NULL;
1061 }
1062
1063 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1064 if (fd < 0) {
1065 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1066 liobn);
1067 return NULL;
1068 }
1069
1070 len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE) * sizeof(VIOsPAPR_RTCE);
1071 /* FIXME: round this up to page size */
1072
1073 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1074 if (table == MAP_FAILED) {
1075 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1076 liobn);
1077 close(fd);
1078 return NULL;
1079 }
1080
1081 *pfd = fd;
1082 return table;
1083 }
1084
1085 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1086 {
1087 long len;
1088
1089 if (fd < 0) {
1090 return -1;
1091 }
1092
1093 len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE)*sizeof(VIOsPAPR_RTCE);
1094 if ((munmap(table, len) < 0) ||
1095 (close(fd) < 0)) {
1096 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1097 strerror(errno));
1098 /* Leak the table */
1099 }
1100
1101 return 0;
1102 }
1103
1104 static inline uint32_t mfpvr(void)
1105 {
1106 uint32_t pvr;
1107
1108 asm ("mfpvr %0"
1109 : "=r"(pvr));
1110 return pvr;
1111 }
1112
1113 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1114 {
1115 if (on) {
1116 *word |= flags;
1117 } else {
1118 *word &= ~flags;
1119 }
1120 }
1121
1122 const ppc_def_t *kvmppc_host_cpu_def(void)
1123 {
1124 uint32_t host_pvr = mfpvr();
1125 const ppc_def_t *base_spec;
1126 ppc_def_t *spec;
1127 uint32_t vmx = kvmppc_get_vmx();
1128 uint32_t dfp = kvmppc_get_dfp();
1129
1130 base_spec = ppc_find_by_pvr(host_pvr);
1131
1132 spec = g_malloc0(sizeof(*spec));
1133 memcpy(spec, base_spec, sizeof(*spec));
1134
1135 /* Now fix up the spec with information we can query from the host */
1136
1137 if (vmx != -1) {
1138 /* Only override when we know what the host supports */
1139 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1140 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1141 }
1142 if (dfp != -1) {
1143 /* Only override when we know what the host supports */
1144 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1145 }
1146
1147 return spec;
1148 }
1149
1150 int kvmppc_fixup_cpu(CPUPPCState *env)
1151 {
1152 int smt;
1153
1154 /* Adjust cpu index for SMT */
1155 smt = kvmppc_smt_threads();
1156 env->cpu_index = (env->cpu_index / smp_threads) * smt
1157 + (env->cpu_index % smp_threads);
1158
1159 return 0;
1160 }
1161
1162
1163 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1164 {
1165 return true;
1166 }
1167
1168 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1169 {
1170 return 1;
1171 }
1172
1173 int kvm_arch_on_sigbus(int code, void *addr)
1174 {
1175 return 1;
1176 }