]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
kvm: Pass CPUState to kvm_vcpu_ioctl()
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "sysemu.h"
28 #include "kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "cpus.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
69 *
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
73 */
74 static QEMUTimer *idle_timer;
75
76 static void kvm_kick_cpu(void *opaque)
77 {
78 PowerPCCPU *cpu = opaque;
79
80 qemu_cpu_kick(CPU(cpu));
81 }
82
83 int kvm_arch_init(KVMState *s)
84 {
85 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
86 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
87 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
88 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
89 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
90 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
91 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
92 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
93
94 if (!cap_interrupt_level) {
95 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
97 }
98
99 return 0;
100 }
101
102 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
103 {
104 CPUPPCState *cenv = &cpu->env;
105 CPUState *cs = CPU(cpu);
106 struct kvm_sregs sregs;
107 int ret;
108
109 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
110 /* What we're really trying to say is "if we're on BookE, we use
111 the native PVR for now". This is the only sane way to check
112 it though, so we potentially confuse users that they can run
113 BookE guests on BookS. Let's hope nobody dares enough :) */
114 return 0;
115 } else {
116 if (!cap_segstate) {
117 fprintf(stderr, "kvm error: missing PVR setting capability\n");
118 return -ENOSYS;
119 }
120 }
121
122 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
123 if (ret) {
124 return ret;
125 }
126
127 sregs.pvr = cenv->spr[SPR_PVR];
128 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
129 }
130
131 /* Set up a shared TLB array with KVM */
132 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
133 {
134 CPUPPCState *env = &cpu->env;
135 CPUState *cs = CPU(cpu);
136 struct kvm_book3e_206_tlb_params params = {};
137 struct kvm_config_tlb cfg = {};
138 struct kvm_enable_cap encap = {};
139 unsigned int entries = 0;
140 int ret, i;
141
142 if (!kvm_enabled() ||
143 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
144 return 0;
145 }
146
147 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
148
149 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
150 params.tlb_sizes[i] = booke206_tlb_size(env, i);
151 params.tlb_ways[i] = booke206_tlb_ways(env, i);
152 entries += params.tlb_sizes[i];
153 }
154
155 assert(entries == env->nb_tlb);
156 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
157
158 env->tlb_dirty = true;
159
160 cfg.array = (uintptr_t)env->tlb.tlbm;
161 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
162 cfg.params = (uintptr_t)&params;
163 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
164
165 encap.cap = KVM_CAP_SW_TLB;
166 encap.args[0] = (uintptr_t)&cfg;
167
168 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
169 if (ret < 0) {
170 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
171 __func__, strerror(-ret));
172 return ret;
173 }
174
175 env->kvm_sw_tlb = true;
176 return 0;
177 }
178
179
180 #if defined(TARGET_PPC64)
181 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
182 struct kvm_ppc_smmu_info *info)
183 {
184 memset(info, 0, sizeof(*info));
185
186 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
187 * need to "guess" what the supported page sizes are.
188 *
189 * For that to work we make a few assumptions:
190 *
191 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
192 * KVM which only supports 4K and 16M pages, but supports them
193 * regardless of the backing store characteritics. We also don't
194 * support 1T segments.
195 *
196 * This is safe as if HV KVM ever supports that capability or PR
197 * KVM grows supports for more page/segment sizes, those versions
198 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
199 * will not hit this fallback
200 *
201 * - Else we are running HV KVM. This means we only support page
202 * sizes that fit in the backing store. Additionally we only
203 * advertize 64K pages if the processor is ARCH 2.06 and we assume
204 * P7 encodings for the SLB and hash table. Here too, we assume
205 * support for any newer processor will mean a kernel that
206 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
207 * this fallback.
208 */
209 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
210 /* No flags */
211 info->flags = 0;
212 info->slb_size = 64;
213
214 /* Standard 4k base page size segment */
215 info->sps[0].page_shift = 12;
216 info->sps[0].slb_enc = 0;
217 info->sps[0].enc[0].page_shift = 12;
218 info->sps[0].enc[0].pte_enc = 0;
219
220 /* Standard 16M large page size segment */
221 info->sps[1].page_shift = 24;
222 info->sps[1].slb_enc = SLB_VSID_L;
223 info->sps[1].enc[0].page_shift = 24;
224 info->sps[1].enc[0].pte_enc = 0;
225 } else {
226 int i = 0;
227
228 /* HV KVM has backing store size restrictions */
229 info->flags = KVM_PPC_PAGE_SIZES_REAL;
230
231 if (env->mmu_model & POWERPC_MMU_1TSEG) {
232 info->flags |= KVM_PPC_1T_SEGMENTS;
233 }
234
235 if (env->mmu_model == POWERPC_MMU_2_06) {
236 info->slb_size = 32;
237 } else {
238 info->slb_size = 64;
239 }
240
241 /* Standard 4k base page size segment */
242 info->sps[i].page_shift = 12;
243 info->sps[i].slb_enc = 0;
244 info->sps[i].enc[0].page_shift = 12;
245 info->sps[i].enc[0].pte_enc = 0;
246 i++;
247
248 /* 64K on MMU 2.06 */
249 if (env->mmu_model == POWERPC_MMU_2_06) {
250 info->sps[i].page_shift = 16;
251 info->sps[i].slb_enc = 0x110;
252 info->sps[i].enc[0].page_shift = 16;
253 info->sps[i].enc[0].pte_enc = 1;
254 i++;
255 }
256
257 /* Standard 16M large page size segment */
258 info->sps[i].page_shift = 24;
259 info->sps[i].slb_enc = SLB_VSID_L;
260 info->sps[i].enc[0].page_shift = 24;
261 info->sps[i].enc[0].pte_enc = 0;
262 }
263 }
264
265 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
266 {
267 int ret;
268
269 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
270 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
271 if (ret == 0) {
272 return;
273 }
274 }
275
276 kvm_get_fallback_smmu_info(env, info);
277 }
278
279 static long getrampagesize(void)
280 {
281 struct statfs fs;
282 int ret;
283
284 if (!mem_path) {
285 /* guest RAM is backed by normal anonymous pages */
286 return getpagesize();
287 }
288
289 do {
290 ret = statfs(mem_path, &fs);
291 } while (ret != 0 && errno == EINTR);
292
293 if (ret != 0) {
294 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
295 strerror(errno));
296 exit(1);
297 }
298
299 #define HUGETLBFS_MAGIC 0x958458f6
300
301 if (fs.f_type != HUGETLBFS_MAGIC) {
302 /* Explicit mempath, but it's ordinary pages */
303 return getpagesize();
304 }
305
306 /* It's hugepage, return the huge page size */
307 return fs.f_bsize;
308 }
309
310 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
311 {
312 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
313 return true;
314 }
315
316 return (1ul << shift) <= rampgsize;
317 }
318
319 static void kvm_fixup_page_sizes(CPUPPCState *env)
320 {
321 static struct kvm_ppc_smmu_info smmu_info;
322 static bool has_smmu_info;
323 long rampagesize;
324 int iq, ik, jq, jk;
325
326 /* We only handle page sizes for 64-bit server guests for now */
327 if (!(env->mmu_model & POWERPC_MMU_64)) {
328 return;
329 }
330
331 /* Collect MMU info from kernel if not already */
332 if (!has_smmu_info) {
333 kvm_get_smmu_info(env, &smmu_info);
334 has_smmu_info = true;
335 }
336
337 rampagesize = getrampagesize();
338
339 /* Convert to QEMU form */
340 memset(&env->sps, 0, sizeof(env->sps));
341
342 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
343 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
344 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
345
346 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
347 ksps->page_shift)) {
348 continue;
349 }
350 qsps->page_shift = ksps->page_shift;
351 qsps->slb_enc = ksps->slb_enc;
352 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
353 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
354 ksps->enc[jk].page_shift)) {
355 continue;
356 }
357 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
358 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
359 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
360 break;
361 }
362 }
363 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
364 break;
365 }
366 }
367 env->slb_nr = smmu_info.slb_size;
368 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
369 env->mmu_model |= POWERPC_MMU_1TSEG;
370 } else {
371 env->mmu_model &= ~POWERPC_MMU_1TSEG;
372 }
373 }
374 #else /* defined (TARGET_PPC64) */
375
376 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
377 {
378 }
379
380 #endif /* !defined (TARGET_PPC64) */
381
382 int kvm_arch_init_vcpu(CPUState *cs)
383 {
384 PowerPCCPU *cpu = POWERPC_CPU(cs);
385 CPUPPCState *cenv = &cpu->env;
386 int ret;
387
388 /* Gather server mmu info from KVM and update the CPU state */
389 kvm_fixup_page_sizes(cenv);
390
391 /* Synchronize sregs with kvm */
392 ret = kvm_arch_sync_sregs(cpu);
393 if (ret) {
394 return ret;
395 }
396
397 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
398
399 /* Some targets support access to KVM's guest TLB. */
400 switch (cenv->mmu_model) {
401 case POWERPC_MMU_BOOKE206:
402 ret = kvm_booke206_tlb_init(cpu);
403 break;
404 default:
405 break;
406 }
407
408 return ret;
409 }
410
411 void kvm_arch_reset_vcpu(CPUState *cpu)
412 {
413 }
414
415 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
416 {
417 CPUPPCState *env = &cpu->env;
418 CPUState *cs = CPU(cpu);
419 struct kvm_dirty_tlb dirty_tlb;
420 unsigned char *bitmap;
421 int ret;
422
423 if (!env->kvm_sw_tlb) {
424 return;
425 }
426
427 bitmap = g_malloc((env->nb_tlb + 7) / 8);
428 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
429
430 dirty_tlb.bitmap = (uintptr_t)bitmap;
431 dirty_tlb.num_dirty = env->nb_tlb;
432
433 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
434 if (ret) {
435 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
436 __func__, strerror(-ret));
437 }
438
439 g_free(bitmap);
440 }
441
442 int kvm_arch_put_registers(CPUState *cs, int level)
443 {
444 PowerPCCPU *cpu = POWERPC_CPU(cs);
445 CPUPPCState *env = &cpu->env;
446 struct kvm_regs regs;
447 int ret;
448 int i;
449
450 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
451 if (ret < 0) {
452 return ret;
453 }
454
455 regs.ctr = env->ctr;
456 regs.lr = env->lr;
457 regs.xer = env->xer;
458 regs.msr = env->msr;
459 regs.pc = env->nip;
460
461 regs.srr0 = env->spr[SPR_SRR0];
462 regs.srr1 = env->spr[SPR_SRR1];
463
464 regs.sprg0 = env->spr[SPR_SPRG0];
465 regs.sprg1 = env->spr[SPR_SPRG1];
466 regs.sprg2 = env->spr[SPR_SPRG2];
467 regs.sprg3 = env->spr[SPR_SPRG3];
468 regs.sprg4 = env->spr[SPR_SPRG4];
469 regs.sprg5 = env->spr[SPR_SPRG5];
470 regs.sprg6 = env->spr[SPR_SPRG6];
471 regs.sprg7 = env->spr[SPR_SPRG7];
472
473 regs.pid = env->spr[SPR_BOOKE_PID];
474
475 for (i = 0;i < 32; i++)
476 regs.gpr[i] = env->gpr[i];
477
478 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
479 if (ret < 0)
480 return ret;
481
482 if (env->tlb_dirty) {
483 kvm_sw_tlb_put(cpu);
484 env->tlb_dirty = false;
485 }
486
487 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
488 struct kvm_sregs sregs;
489
490 sregs.pvr = env->spr[SPR_PVR];
491
492 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
493
494 /* Sync SLB */
495 #ifdef TARGET_PPC64
496 for (i = 0; i < 64; i++) {
497 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
498 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
499 }
500 #endif
501
502 /* Sync SRs */
503 for (i = 0; i < 16; i++) {
504 sregs.u.s.ppc32.sr[i] = env->sr[i];
505 }
506
507 /* Sync BATs */
508 for (i = 0; i < 8; i++) {
509 /* Beware. We have to swap upper and lower bits here */
510 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
511 | env->DBAT[1][i];
512 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
513 | env->IBAT[1][i];
514 }
515
516 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
517 if (ret) {
518 return ret;
519 }
520 }
521
522 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
523 uint64_t hior = env->spr[SPR_HIOR];
524 struct kvm_one_reg reg = {
525 .id = KVM_REG_PPC_HIOR,
526 .addr = (uintptr_t) &hior,
527 };
528
529 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
530 if (ret) {
531 return ret;
532 }
533 }
534
535 return ret;
536 }
537
538 int kvm_arch_get_registers(CPUState *cs)
539 {
540 PowerPCCPU *cpu = POWERPC_CPU(cs);
541 CPUPPCState *env = &cpu->env;
542 struct kvm_regs regs;
543 struct kvm_sregs sregs;
544 uint32_t cr;
545 int i, ret;
546
547 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
548 if (ret < 0)
549 return ret;
550
551 cr = regs.cr;
552 for (i = 7; i >= 0; i--) {
553 env->crf[i] = cr & 15;
554 cr >>= 4;
555 }
556
557 env->ctr = regs.ctr;
558 env->lr = regs.lr;
559 env->xer = regs.xer;
560 env->msr = regs.msr;
561 env->nip = regs.pc;
562
563 env->spr[SPR_SRR0] = regs.srr0;
564 env->spr[SPR_SRR1] = regs.srr1;
565
566 env->spr[SPR_SPRG0] = regs.sprg0;
567 env->spr[SPR_SPRG1] = regs.sprg1;
568 env->spr[SPR_SPRG2] = regs.sprg2;
569 env->spr[SPR_SPRG3] = regs.sprg3;
570 env->spr[SPR_SPRG4] = regs.sprg4;
571 env->spr[SPR_SPRG5] = regs.sprg5;
572 env->spr[SPR_SPRG6] = regs.sprg6;
573 env->spr[SPR_SPRG7] = regs.sprg7;
574
575 env->spr[SPR_BOOKE_PID] = regs.pid;
576
577 for (i = 0;i < 32; i++)
578 env->gpr[i] = regs.gpr[i];
579
580 if (cap_booke_sregs) {
581 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
582 if (ret < 0) {
583 return ret;
584 }
585
586 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
587 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
588 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
589 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
590 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
591 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
592 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
593 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
594 env->spr[SPR_DECR] = sregs.u.e.dec;
595 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
596 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
597 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
598 }
599
600 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
601 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
602 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
603 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
604 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
605 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
606 }
607
608 if (sregs.u.e.features & KVM_SREGS_E_64) {
609 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
610 }
611
612 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
613 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
614 }
615
616 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
617 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
618 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
619 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
620 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
621 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
622 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
623 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
624 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
625 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
626 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
627 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
628 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
629 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
630 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
631 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
632 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
633
634 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
635 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
636 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
637 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
638 }
639
640 if (sregs.u.e.features & KVM_SREGS_E_PM) {
641 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
642 }
643
644 if (sregs.u.e.features & KVM_SREGS_E_PC) {
645 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
646 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
647 }
648 }
649
650 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
651 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
652 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
653 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
654 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
655 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
656 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
657 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
658 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
659 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
660 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
661 }
662
663 if (sregs.u.e.features & KVM_SREGS_EXP) {
664 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
665 }
666
667 if (sregs.u.e.features & KVM_SREGS_E_PD) {
668 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
669 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
670 }
671
672 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
673 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
674 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
675 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
676
677 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
678 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
679 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
680 }
681 }
682 }
683
684 if (cap_segstate) {
685 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
686 if (ret < 0) {
687 return ret;
688 }
689
690 ppc_store_sdr1(env, sregs.u.s.sdr1);
691
692 /* Sync SLB */
693 #ifdef TARGET_PPC64
694 for (i = 0; i < 64; i++) {
695 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
696 sregs.u.s.ppc64.slb[i].slbv);
697 }
698 #endif
699
700 /* Sync SRs */
701 for (i = 0; i < 16; i++) {
702 env->sr[i] = sregs.u.s.ppc32.sr[i];
703 }
704
705 /* Sync BATs */
706 for (i = 0; i < 8; i++) {
707 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
708 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
709 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
710 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
711 }
712 }
713
714 return 0;
715 }
716
717 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
718 {
719 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
720
721 if (irq != PPC_INTERRUPT_EXT) {
722 return 0;
723 }
724
725 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
726 return 0;
727 }
728
729 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
730
731 return 0;
732 }
733
734 #if defined(TARGET_PPCEMB)
735 #define PPC_INPUT_INT PPC40x_INPUT_INT
736 #elif defined(TARGET_PPC64)
737 #define PPC_INPUT_INT PPC970_INPUT_INT
738 #else
739 #define PPC_INPUT_INT PPC6xx_INPUT_INT
740 #endif
741
742 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
743 {
744 PowerPCCPU *cpu = POWERPC_CPU(cs);
745 CPUPPCState *env = &cpu->env;
746 int r;
747 unsigned irq;
748
749 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
750 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
751 if (!cap_interrupt_level &&
752 run->ready_for_interrupt_injection &&
753 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
754 (env->irq_input_state & (1<<PPC_INPUT_INT)))
755 {
756 /* For now KVM disregards the 'irq' argument. However, in the
757 * future KVM could cache it in-kernel to avoid a heavyweight exit
758 * when reading the UIC.
759 */
760 irq = KVM_INTERRUPT_SET;
761
762 dprintf("injected interrupt %d\n", irq);
763 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
764 if (r < 0)
765 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
766
767 /* Always wake up soon in case the interrupt was level based */
768 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
769 (get_ticks_per_sec() / 50));
770 }
771
772 /* We don't know if there are more interrupts pending after this. However,
773 * the guest will return to userspace in the course of handling this one
774 * anyways, so we will get a chance to deliver the rest. */
775 }
776
777 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
778 {
779 }
780
781 int kvm_arch_process_async_events(CPUState *cs)
782 {
783 PowerPCCPU *cpu = POWERPC_CPU(cs);
784 return cpu->env.halted;
785 }
786
787 static int kvmppc_handle_halt(CPUPPCState *env)
788 {
789 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
790 env->halted = 1;
791 env->exception_index = EXCP_HLT;
792 }
793
794 return 0;
795 }
796
797 /* map dcr access to existing qemu dcr emulation */
798 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
799 {
800 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
801 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
802
803 return 0;
804 }
805
806 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
807 {
808 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
809 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
810
811 return 0;
812 }
813
814 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
815 {
816 PowerPCCPU *cpu = POWERPC_CPU(cs);
817 CPUPPCState *env = &cpu->env;
818 int ret;
819
820 switch (run->exit_reason) {
821 case KVM_EXIT_DCR:
822 if (run->dcr.is_write) {
823 dprintf("handle dcr write\n");
824 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
825 } else {
826 dprintf("handle dcr read\n");
827 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
828 }
829 break;
830 case KVM_EXIT_HLT:
831 dprintf("handle halt\n");
832 ret = kvmppc_handle_halt(env);
833 break;
834 #ifdef CONFIG_PSERIES
835 case KVM_EXIT_PAPR_HCALL:
836 dprintf("handle PAPR hypercall\n");
837 run->papr_hcall.ret = spapr_hypercall(cpu,
838 run->papr_hcall.nr,
839 run->papr_hcall.args);
840 ret = 0;
841 break;
842 #endif
843 default:
844 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
845 ret = -1;
846 break;
847 }
848
849 return ret;
850 }
851
852 static int read_cpuinfo(const char *field, char *value, int len)
853 {
854 FILE *f;
855 int ret = -1;
856 int field_len = strlen(field);
857 char line[512];
858
859 f = fopen("/proc/cpuinfo", "r");
860 if (!f) {
861 return -1;
862 }
863
864 do {
865 if(!fgets(line, sizeof(line), f)) {
866 break;
867 }
868 if (!strncmp(line, field, field_len)) {
869 pstrcpy(value, len, line);
870 ret = 0;
871 break;
872 }
873 } while(*line);
874
875 fclose(f);
876
877 return ret;
878 }
879
880 uint32_t kvmppc_get_tbfreq(void)
881 {
882 char line[512];
883 char *ns;
884 uint32_t retval = get_ticks_per_sec();
885
886 if (read_cpuinfo("timebase", line, sizeof(line))) {
887 return retval;
888 }
889
890 if (!(ns = strchr(line, ':'))) {
891 return retval;
892 }
893
894 ns++;
895
896 retval = atoi(ns);
897 return retval;
898 }
899
900 /* Try to find a device tree node for a CPU with clock-frequency property */
901 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
902 {
903 struct dirent *dirp;
904 DIR *dp;
905
906 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
907 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
908 return -1;
909 }
910
911 buf[0] = '\0';
912 while ((dirp = readdir(dp)) != NULL) {
913 FILE *f;
914 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
915 dirp->d_name);
916 f = fopen(buf, "r");
917 if (f) {
918 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
919 fclose(f);
920 break;
921 }
922 buf[0] = '\0';
923 }
924 closedir(dp);
925 if (buf[0] == '\0') {
926 printf("Unknown host!\n");
927 return -1;
928 }
929
930 return 0;
931 }
932
933 /* Read a CPU node property from the host device tree that's a single
934 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
935 * (can't find or open the property, or doesn't understand the
936 * format) */
937 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
938 {
939 char buf[PATH_MAX];
940 union {
941 uint32_t v32;
942 uint64_t v64;
943 } u;
944 FILE *f;
945 int len;
946
947 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
948 return -1;
949 }
950
951 strncat(buf, "/", sizeof(buf) - strlen(buf));
952 strncat(buf, propname, sizeof(buf) - strlen(buf));
953
954 f = fopen(buf, "rb");
955 if (!f) {
956 return -1;
957 }
958
959 len = fread(&u, 1, sizeof(u), f);
960 fclose(f);
961 switch (len) {
962 case 4:
963 /* property is a 32-bit quantity */
964 return be32_to_cpu(u.v32);
965 case 8:
966 return be64_to_cpu(u.v64);
967 }
968
969 return 0;
970 }
971
972 uint64_t kvmppc_get_clockfreq(void)
973 {
974 return kvmppc_read_int_cpu_dt("clock-frequency");
975 }
976
977 uint32_t kvmppc_get_vmx(void)
978 {
979 return kvmppc_read_int_cpu_dt("ibm,vmx");
980 }
981
982 uint32_t kvmppc_get_dfp(void)
983 {
984 return kvmppc_read_int_cpu_dt("ibm,dfp");
985 }
986
987 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
988 {
989 uint32_t *hc = (uint32_t*)buf;
990
991 struct kvm_ppc_pvinfo pvinfo;
992
993 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
994 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
995 memcpy(buf, pvinfo.hcall, buf_len);
996
997 return 0;
998 }
999
1000 /*
1001 * Fallback to always fail hypercalls:
1002 *
1003 * li r3, -1
1004 * nop
1005 * nop
1006 * nop
1007 */
1008
1009 hc[0] = 0x3860ffff;
1010 hc[1] = 0x60000000;
1011 hc[2] = 0x60000000;
1012 hc[3] = 0x60000000;
1013
1014 return 0;
1015 }
1016
1017 void kvmppc_set_papr(PowerPCCPU *cpu)
1018 {
1019 CPUPPCState *env = &cpu->env;
1020 CPUState *cs = CPU(cpu);
1021 struct kvm_enable_cap cap = {};
1022 int ret;
1023
1024 cap.cap = KVM_CAP_PPC_PAPR;
1025 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1026
1027 if (ret) {
1028 cpu_abort(env, "This KVM version does not support PAPR\n");
1029 }
1030 }
1031
1032 int kvmppc_smt_threads(void)
1033 {
1034 return cap_ppc_smt ? cap_ppc_smt : 1;
1035 }
1036
1037 #ifdef TARGET_PPC64
1038 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1039 {
1040 void *rma;
1041 off_t size;
1042 int fd;
1043 struct kvm_allocate_rma ret;
1044 MemoryRegion *rma_region;
1045
1046 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1047 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1048 * not necessary on this hardware
1049 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1050 *
1051 * FIXME: We should allow the user to force contiguous RMA
1052 * allocation in the cap_ppc_rma==1 case.
1053 */
1054 if (cap_ppc_rma < 2) {
1055 return 0;
1056 }
1057
1058 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1059 if (fd < 0) {
1060 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1061 strerror(errno));
1062 return -1;
1063 }
1064
1065 size = MIN(ret.rma_size, 256ul << 20);
1066
1067 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1068 if (rma == MAP_FAILED) {
1069 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1070 return -1;
1071 };
1072
1073 rma_region = g_new(MemoryRegion, 1);
1074 memory_region_init_ram_ptr(rma_region, name, size, rma);
1075 vmstate_register_ram_global(rma_region);
1076 memory_region_add_subregion(sysmem, 0, rma_region);
1077
1078 return size;
1079 }
1080
1081 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1082 {
1083 if (cap_ppc_rma >= 2) {
1084 return current_size;
1085 }
1086 return MIN(current_size,
1087 getrampagesize() << (hash_shift - 7));
1088 }
1089 #endif
1090
1091 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1092 {
1093 struct kvm_create_spapr_tce args = {
1094 .liobn = liobn,
1095 .window_size = window_size,
1096 };
1097 long len;
1098 int fd;
1099 void *table;
1100
1101 /* Must set fd to -1 so we don't try to munmap when called for
1102 * destroying the table, which the upper layers -will- do
1103 */
1104 *pfd = -1;
1105 if (!cap_spapr_tce) {
1106 return NULL;
1107 }
1108
1109 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1110 if (fd < 0) {
1111 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1112 liobn);
1113 return NULL;
1114 }
1115
1116 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1117 /* FIXME: round this up to page size */
1118
1119 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1120 if (table == MAP_FAILED) {
1121 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1122 liobn);
1123 close(fd);
1124 return NULL;
1125 }
1126
1127 *pfd = fd;
1128 return table;
1129 }
1130
1131 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1132 {
1133 long len;
1134
1135 if (fd < 0) {
1136 return -1;
1137 }
1138
1139 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1140 if ((munmap(table, len) < 0) ||
1141 (close(fd) < 0)) {
1142 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1143 strerror(errno));
1144 /* Leak the table */
1145 }
1146
1147 return 0;
1148 }
1149
1150 int kvmppc_reset_htab(int shift_hint)
1151 {
1152 uint32_t shift = shift_hint;
1153
1154 if (!kvm_enabled()) {
1155 /* Full emulation, tell caller to allocate htab itself */
1156 return 0;
1157 }
1158 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1159 int ret;
1160 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1161 if (ret == -ENOTTY) {
1162 /* At least some versions of PR KVM advertise the
1163 * capability, but don't implement the ioctl(). Oops.
1164 * Return 0 so that we allocate the htab in qemu, as is
1165 * correct for PR. */
1166 return 0;
1167 } else if (ret < 0) {
1168 return ret;
1169 }
1170 return shift;
1171 }
1172
1173 /* We have a kernel that predates the htab reset calls. For PR
1174 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1175 * this era, it has allocated a 16MB fixed size hash table
1176 * already. Kernels of this era have the GET_PVINFO capability
1177 * only on PR, so we use this hack to determine the right
1178 * answer */
1179 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1180 /* PR - tell caller to allocate htab */
1181 return 0;
1182 } else {
1183 /* HV - assume 16MB kernel allocated htab */
1184 return 24;
1185 }
1186 }
1187
1188 static inline uint32_t mfpvr(void)
1189 {
1190 uint32_t pvr;
1191
1192 asm ("mfpvr %0"
1193 : "=r"(pvr));
1194 return pvr;
1195 }
1196
1197 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1198 {
1199 if (on) {
1200 *word |= flags;
1201 } else {
1202 *word &= ~flags;
1203 }
1204 }
1205
1206 const ppc_def_t *kvmppc_host_cpu_def(void)
1207 {
1208 uint32_t host_pvr = mfpvr();
1209 const ppc_def_t *base_spec;
1210 ppc_def_t *spec;
1211 uint32_t vmx = kvmppc_get_vmx();
1212 uint32_t dfp = kvmppc_get_dfp();
1213
1214 base_spec = ppc_find_by_pvr(host_pvr);
1215
1216 spec = g_malloc0(sizeof(*spec));
1217 memcpy(spec, base_spec, sizeof(*spec));
1218
1219 /* Now fix up the spec with information we can query from the host */
1220
1221 if (vmx != -1) {
1222 /* Only override when we know what the host supports */
1223 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1224 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1225 }
1226 if (dfp != -1) {
1227 /* Only override when we know what the host supports */
1228 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1229 }
1230
1231 return spec;
1232 }
1233
1234 int kvmppc_fixup_cpu(CPUPPCState *env)
1235 {
1236 int smt;
1237
1238 /* Adjust cpu index for SMT */
1239 smt = kvmppc_smt_threads();
1240 env->cpu_index = (env->cpu_index / smp_threads) * smt
1241 + (env->cpu_index % smp_threads);
1242
1243 return 0;
1244 }
1245
1246
1247 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1248 {
1249 return true;
1250 }
1251
1252 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1253 {
1254 return 1;
1255 }
1256
1257 int kvm_arch_on_sigbus(int code, void *addr)
1258 {
1259 return 1;
1260 }