]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
cpu: Move kvm_state field into CPUState
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "sysemu.h"
28 #include "kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "cpus.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
69 *
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
73 */
74 static QEMUTimer *idle_timer;
75
76 static void kvm_kick_cpu(void *opaque)
77 {
78 PowerPCCPU *cpu = opaque;
79
80 qemu_cpu_kick(CPU(cpu));
81 }
82
83 int kvm_arch_init(KVMState *s)
84 {
85 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
86 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
87 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
88 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
89 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
90 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
91 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
92 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
93
94 if (!cap_interrupt_level) {
95 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
97 }
98
99 return 0;
100 }
101
102 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
103 {
104 CPUPPCState *cenv = &cpu->env;
105 CPUState *cs = CPU(cpu);
106 struct kvm_sregs sregs;
107 int ret;
108
109 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
110 /* What we're really trying to say is "if we're on BookE, we use
111 the native PVR for now". This is the only sane way to check
112 it though, so we potentially confuse users that they can run
113 BookE guests on BookS. Let's hope nobody dares enough :) */
114 return 0;
115 } else {
116 if (!cap_segstate) {
117 fprintf(stderr, "kvm error: missing PVR setting capability\n");
118 return -ENOSYS;
119 }
120 }
121
122 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
123 if (ret) {
124 return ret;
125 }
126
127 sregs.pvr = cenv->spr[SPR_PVR];
128 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
129 }
130
131 /* Set up a shared TLB array with KVM */
132 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
133 {
134 CPUPPCState *env = &cpu->env;
135 CPUState *cs = CPU(cpu);
136 struct kvm_book3e_206_tlb_params params = {};
137 struct kvm_config_tlb cfg = {};
138 struct kvm_enable_cap encap = {};
139 unsigned int entries = 0;
140 int ret, i;
141
142 if (!kvm_enabled() ||
143 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
144 return 0;
145 }
146
147 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
148
149 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
150 params.tlb_sizes[i] = booke206_tlb_size(env, i);
151 params.tlb_ways[i] = booke206_tlb_ways(env, i);
152 entries += params.tlb_sizes[i];
153 }
154
155 assert(entries == env->nb_tlb);
156 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
157
158 env->tlb_dirty = true;
159
160 cfg.array = (uintptr_t)env->tlb.tlbm;
161 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
162 cfg.params = (uintptr_t)&params;
163 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
164
165 encap.cap = KVM_CAP_SW_TLB;
166 encap.args[0] = (uintptr_t)&cfg;
167
168 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
169 if (ret < 0) {
170 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
171 __func__, strerror(-ret));
172 return ret;
173 }
174
175 env->kvm_sw_tlb = true;
176 return 0;
177 }
178
179
180 #if defined(TARGET_PPC64)
181 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
182 struct kvm_ppc_smmu_info *info)
183 {
184 CPUPPCState *env = &cpu->env;
185 CPUState *cs = CPU(cpu);
186
187 memset(info, 0, sizeof(*info));
188
189 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
190 * need to "guess" what the supported page sizes are.
191 *
192 * For that to work we make a few assumptions:
193 *
194 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
195 * KVM which only supports 4K and 16M pages, but supports them
196 * regardless of the backing store characteritics. We also don't
197 * support 1T segments.
198 *
199 * This is safe as if HV KVM ever supports that capability or PR
200 * KVM grows supports for more page/segment sizes, those versions
201 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
202 * will not hit this fallback
203 *
204 * - Else we are running HV KVM. This means we only support page
205 * sizes that fit in the backing store. Additionally we only
206 * advertize 64K pages if the processor is ARCH 2.06 and we assume
207 * P7 encodings for the SLB and hash table. Here too, we assume
208 * support for any newer processor will mean a kernel that
209 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
210 * this fallback.
211 */
212 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
213 /* No flags */
214 info->flags = 0;
215 info->slb_size = 64;
216
217 /* Standard 4k base page size segment */
218 info->sps[0].page_shift = 12;
219 info->sps[0].slb_enc = 0;
220 info->sps[0].enc[0].page_shift = 12;
221 info->sps[0].enc[0].pte_enc = 0;
222
223 /* Standard 16M large page size segment */
224 info->sps[1].page_shift = 24;
225 info->sps[1].slb_enc = SLB_VSID_L;
226 info->sps[1].enc[0].page_shift = 24;
227 info->sps[1].enc[0].pte_enc = 0;
228 } else {
229 int i = 0;
230
231 /* HV KVM has backing store size restrictions */
232 info->flags = KVM_PPC_PAGE_SIZES_REAL;
233
234 if (env->mmu_model & POWERPC_MMU_1TSEG) {
235 info->flags |= KVM_PPC_1T_SEGMENTS;
236 }
237
238 if (env->mmu_model == POWERPC_MMU_2_06) {
239 info->slb_size = 32;
240 } else {
241 info->slb_size = 64;
242 }
243
244 /* Standard 4k base page size segment */
245 info->sps[i].page_shift = 12;
246 info->sps[i].slb_enc = 0;
247 info->sps[i].enc[0].page_shift = 12;
248 info->sps[i].enc[0].pte_enc = 0;
249 i++;
250
251 /* 64K on MMU 2.06 */
252 if (env->mmu_model == POWERPC_MMU_2_06) {
253 info->sps[i].page_shift = 16;
254 info->sps[i].slb_enc = 0x110;
255 info->sps[i].enc[0].page_shift = 16;
256 info->sps[i].enc[0].pte_enc = 1;
257 i++;
258 }
259
260 /* Standard 16M large page size segment */
261 info->sps[i].page_shift = 24;
262 info->sps[i].slb_enc = SLB_VSID_L;
263 info->sps[i].enc[0].page_shift = 24;
264 info->sps[i].enc[0].pte_enc = 0;
265 }
266 }
267
268 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
269 {
270 CPUState *cs = CPU(cpu);
271 int ret;
272
273 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
274 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
275 if (ret == 0) {
276 return;
277 }
278 }
279
280 kvm_get_fallback_smmu_info(cpu, info);
281 }
282
283 static long getrampagesize(void)
284 {
285 struct statfs fs;
286 int ret;
287
288 if (!mem_path) {
289 /* guest RAM is backed by normal anonymous pages */
290 return getpagesize();
291 }
292
293 do {
294 ret = statfs(mem_path, &fs);
295 } while (ret != 0 && errno == EINTR);
296
297 if (ret != 0) {
298 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
299 strerror(errno));
300 exit(1);
301 }
302
303 #define HUGETLBFS_MAGIC 0x958458f6
304
305 if (fs.f_type != HUGETLBFS_MAGIC) {
306 /* Explicit mempath, but it's ordinary pages */
307 return getpagesize();
308 }
309
310 /* It's hugepage, return the huge page size */
311 return fs.f_bsize;
312 }
313
314 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
315 {
316 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
317 return true;
318 }
319
320 return (1ul << shift) <= rampgsize;
321 }
322
323 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
324 {
325 static struct kvm_ppc_smmu_info smmu_info;
326 static bool has_smmu_info;
327 CPUPPCState *env = &cpu->env;
328 long rampagesize;
329 int iq, ik, jq, jk;
330
331 /* We only handle page sizes for 64-bit server guests for now */
332 if (!(env->mmu_model & POWERPC_MMU_64)) {
333 return;
334 }
335
336 /* Collect MMU info from kernel if not already */
337 if (!has_smmu_info) {
338 kvm_get_smmu_info(cpu, &smmu_info);
339 has_smmu_info = true;
340 }
341
342 rampagesize = getrampagesize();
343
344 /* Convert to QEMU form */
345 memset(&env->sps, 0, sizeof(env->sps));
346
347 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
348 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
349 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
350
351 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
352 ksps->page_shift)) {
353 continue;
354 }
355 qsps->page_shift = ksps->page_shift;
356 qsps->slb_enc = ksps->slb_enc;
357 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
358 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
359 ksps->enc[jk].page_shift)) {
360 continue;
361 }
362 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
363 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
364 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
365 break;
366 }
367 }
368 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
369 break;
370 }
371 }
372 env->slb_nr = smmu_info.slb_size;
373 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
374 env->mmu_model |= POWERPC_MMU_1TSEG;
375 } else {
376 env->mmu_model &= ~POWERPC_MMU_1TSEG;
377 }
378 }
379 #else /* defined (TARGET_PPC64) */
380
381 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
382 {
383 }
384
385 #endif /* !defined (TARGET_PPC64) */
386
387 int kvm_arch_init_vcpu(CPUState *cs)
388 {
389 PowerPCCPU *cpu = POWERPC_CPU(cs);
390 CPUPPCState *cenv = &cpu->env;
391 int ret;
392
393 /* Gather server mmu info from KVM and update the CPU state */
394 kvm_fixup_page_sizes(cpu);
395
396 /* Synchronize sregs with kvm */
397 ret = kvm_arch_sync_sregs(cpu);
398 if (ret) {
399 return ret;
400 }
401
402 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
403
404 /* Some targets support access to KVM's guest TLB. */
405 switch (cenv->mmu_model) {
406 case POWERPC_MMU_BOOKE206:
407 ret = kvm_booke206_tlb_init(cpu);
408 break;
409 default:
410 break;
411 }
412
413 return ret;
414 }
415
416 void kvm_arch_reset_vcpu(CPUState *cpu)
417 {
418 }
419
420 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
421 {
422 CPUPPCState *env = &cpu->env;
423 CPUState *cs = CPU(cpu);
424 struct kvm_dirty_tlb dirty_tlb;
425 unsigned char *bitmap;
426 int ret;
427
428 if (!env->kvm_sw_tlb) {
429 return;
430 }
431
432 bitmap = g_malloc((env->nb_tlb + 7) / 8);
433 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
434
435 dirty_tlb.bitmap = (uintptr_t)bitmap;
436 dirty_tlb.num_dirty = env->nb_tlb;
437
438 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
439 if (ret) {
440 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
441 __func__, strerror(-ret));
442 }
443
444 g_free(bitmap);
445 }
446
447 int kvm_arch_put_registers(CPUState *cs, int level)
448 {
449 PowerPCCPU *cpu = POWERPC_CPU(cs);
450 CPUPPCState *env = &cpu->env;
451 struct kvm_regs regs;
452 int ret;
453 int i;
454
455 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
456 if (ret < 0) {
457 return ret;
458 }
459
460 regs.ctr = env->ctr;
461 regs.lr = env->lr;
462 regs.xer = env->xer;
463 regs.msr = env->msr;
464 regs.pc = env->nip;
465
466 regs.srr0 = env->spr[SPR_SRR0];
467 regs.srr1 = env->spr[SPR_SRR1];
468
469 regs.sprg0 = env->spr[SPR_SPRG0];
470 regs.sprg1 = env->spr[SPR_SPRG1];
471 regs.sprg2 = env->spr[SPR_SPRG2];
472 regs.sprg3 = env->spr[SPR_SPRG3];
473 regs.sprg4 = env->spr[SPR_SPRG4];
474 regs.sprg5 = env->spr[SPR_SPRG5];
475 regs.sprg6 = env->spr[SPR_SPRG6];
476 regs.sprg7 = env->spr[SPR_SPRG7];
477
478 regs.pid = env->spr[SPR_BOOKE_PID];
479
480 for (i = 0;i < 32; i++)
481 regs.gpr[i] = env->gpr[i];
482
483 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
484 if (ret < 0)
485 return ret;
486
487 if (env->tlb_dirty) {
488 kvm_sw_tlb_put(cpu);
489 env->tlb_dirty = false;
490 }
491
492 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
493 struct kvm_sregs sregs;
494
495 sregs.pvr = env->spr[SPR_PVR];
496
497 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
498
499 /* Sync SLB */
500 #ifdef TARGET_PPC64
501 for (i = 0; i < 64; i++) {
502 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
503 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
504 }
505 #endif
506
507 /* Sync SRs */
508 for (i = 0; i < 16; i++) {
509 sregs.u.s.ppc32.sr[i] = env->sr[i];
510 }
511
512 /* Sync BATs */
513 for (i = 0; i < 8; i++) {
514 /* Beware. We have to swap upper and lower bits here */
515 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
516 | env->DBAT[1][i];
517 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
518 | env->IBAT[1][i];
519 }
520
521 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
522 if (ret) {
523 return ret;
524 }
525 }
526
527 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
528 uint64_t hior = env->spr[SPR_HIOR];
529 struct kvm_one_reg reg = {
530 .id = KVM_REG_PPC_HIOR,
531 .addr = (uintptr_t) &hior,
532 };
533
534 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
535 if (ret) {
536 return ret;
537 }
538 }
539
540 return ret;
541 }
542
543 int kvm_arch_get_registers(CPUState *cs)
544 {
545 PowerPCCPU *cpu = POWERPC_CPU(cs);
546 CPUPPCState *env = &cpu->env;
547 struct kvm_regs regs;
548 struct kvm_sregs sregs;
549 uint32_t cr;
550 int i, ret;
551
552 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
553 if (ret < 0)
554 return ret;
555
556 cr = regs.cr;
557 for (i = 7; i >= 0; i--) {
558 env->crf[i] = cr & 15;
559 cr >>= 4;
560 }
561
562 env->ctr = regs.ctr;
563 env->lr = regs.lr;
564 env->xer = regs.xer;
565 env->msr = regs.msr;
566 env->nip = regs.pc;
567
568 env->spr[SPR_SRR0] = regs.srr0;
569 env->spr[SPR_SRR1] = regs.srr1;
570
571 env->spr[SPR_SPRG0] = regs.sprg0;
572 env->spr[SPR_SPRG1] = regs.sprg1;
573 env->spr[SPR_SPRG2] = regs.sprg2;
574 env->spr[SPR_SPRG3] = regs.sprg3;
575 env->spr[SPR_SPRG4] = regs.sprg4;
576 env->spr[SPR_SPRG5] = regs.sprg5;
577 env->spr[SPR_SPRG6] = regs.sprg6;
578 env->spr[SPR_SPRG7] = regs.sprg7;
579
580 env->spr[SPR_BOOKE_PID] = regs.pid;
581
582 for (i = 0;i < 32; i++)
583 env->gpr[i] = regs.gpr[i];
584
585 if (cap_booke_sregs) {
586 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
587 if (ret < 0) {
588 return ret;
589 }
590
591 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
592 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
593 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
594 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
595 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
596 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
597 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
598 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
599 env->spr[SPR_DECR] = sregs.u.e.dec;
600 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
601 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
602 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
603 }
604
605 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
606 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
607 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
608 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
609 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
610 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
611 }
612
613 if (sregs.u.e.features & KVM_SREGS_E_64) {
614 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
615 }
616
617 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
618 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
619 }
620
621 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
622 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
623 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
624 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
625 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
626 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
627 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
628 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
629 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
630 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
631 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
632 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
633 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
634 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
635 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
636 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
637 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
638
639 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
640 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
641 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
642 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
643 }
644
645 if (sregs.u.e.features & KVM_SREGS_E_PM) {
646 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
647 }
648
649 if (sregs.u.e.features & KVM_SREGS_E_PC) {
650 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
651 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
652 }
653 }
654
655 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
656 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
657 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
658 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
659 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
660 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
661 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
662 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
663 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
664 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
665 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
666 }
667
668 if (sregs.u.e.features & KVM_SREGS_EXP) {
669 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
670 }
671
672 if (sregs.u.e.features & KVM_SREGS_E_PD) {
673 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
674 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
675 }
676
677 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
678 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
679 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
680 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
681
682 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
683 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
684 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
685 }
686 }
687 }
688
689 if (cap_segstate) {
690 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
691 if (ret < 0) {
692 return ret;
693 }
694
695 ppc_store_sdr1(env, sregs.u.s.sdr1);
696
697 /* Sync SLB */
698 #ifdef TARGET_PPC64
699 for (i = 0; i < 64; i++) {
700 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
701 sregs.u.s.ppc64.slb[i].slbv);
702 }
703 #endif
704
705 /* Sync SRs */
706 for (i = 0; i < 16; i++) {
707 env->sr[i] = sregs.u.s.ppc32.sr[i];
708 }
709
710 /* Sync BATs */
711 for (i = 0; i < 8; i++) {
712 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
713 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
714 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
715 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
716 }
717 }
718
719 return 0;
720 }
721
722 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
723 {
724 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
725
726 if (irq != PPC_INTERRUPT_EXT) {
727 return 0;
728 }
729
730 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
731 return 0;
732 }
733
734 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
735
736 return 0;
737 }
738
739 #if defined(TARGET_PPCEMB)
740 #define PPC_INPUT_INT PPC40x_INPUT_INT
741 #elif defined(TARGET_PPC64)
742 #define PPC_INPUT_INT PPC970_INPUT_INT
743 #else
744 #define PPC_INPUT_INT PPC6xx_INPUT_INT
745 #endif
746
747 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
748 {
749 PowerPCCPU *cpu = POWERPC_CPU(cs);
750 CPUPPCState *env = &cpu->env;
751 int r;
752 unsigned irq;
753
754 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
755 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
756 if (!cap_interrupt_level &&
757 run->ready_for_interrupt_injection &&
758 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
759 (env->irq_input_state & (1<<PPC_INPUT_INT)))
760 {
761 /* For now KVM disregards the 'irq' argument. However, in the
762 * future KVM could cache it in-kernel to avoid a heavyweight exit
763 * when reading the UIC.
764 */
765 irq = KVM_INTERRUPT_SET;
766
767 dprintf("injected interrupt %d\n", irq);
768 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
769 if (r < 0)
770 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
771
772 /* Always wake up soon in case the interrupt was level based */
773 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
774 (get_ticks_per_sec() / 50));
775 }
776
777 /* We don't know if there are more interrupts pending after this. However,
778 * the guest will return to userspace in the course of handling this one
779 * anyways, so we will get a chance to deliver the rest. */
780 }
781
782 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
783 {
784 }
785
786 int kvm_arch_process_async_events(CPUState *cs)
787 {
788 PowerPCCPU *cpu = POWERPC_CPU(cs);
789 return cpu->env.halted;
790 }
791
792 static int kvmppc_handle_halt(CPUPPCState *env)
793 {
794 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
795 env->halted = 1;
796 env->exception_index = EXCP_HLT;
797 }
798
799 return 0;
800 }
801
802 /* map dcr access to existing qemu dcr emulation */
803 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
804 {
805 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
806 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
807
808 return 0;
809 }
810
811 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
812 {
813 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
814 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
815
816 return 0;
817 }
818
819 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
820 {
821 PowerPCCPU *cpu = POWERPC_CPU(cs);
822 CPUPPCState *env = &cpu->env;
823 int ret;
824
825 switch (run->exit_reason) {
826 case KVM_EXIT_DCR:
827 if (run->dcr.is_write) {
828 dprintf("handle dcr write\n");
829 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
830 } else {
831 dprintf("handle dcr read\n");
832 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
833 }
834 break;
835 case KVM_EXIT_HLT:
836 dprintf("handle halt\n");
837 ret = kvmppc_handle_halt(env);
838 break;
839 #ifdef CONFIG_PSERIES
840 case KVM_EXIT_PAPR_HCALL:
841 dprintf("handle PAPR hypercall\n");
842 run->papr_hcall.ret = spapr_hypercall(cpu,
843 run->papr_hcall.nr,
844 run->papr_hcall.args);
845 ret = 0;
846 break;
847 #endif
848 default:
849 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
850 ret = -1;
851 break;
852 }
853
854 return ret;
855 }
856
857 static int read_cpuinfo(const char *field, char *value, int len)
858 {
859 FILE *f;
860 int ret = -1;
861 int field_len = strlen(field);
862 char line[512];
863
864 f = fopen("/proc/cpuinfo", "r");
865 if (!f) {
866 return -1;
867 }
868
869 do {
870 if(!fgets(line, sizeof(line), f)) {
871 break;
872 }
873 if (!strncmp(line, field, field_len)) {
874 pstrcpy(value, len, line);
875 ret = 0;
876 break;
877 }
878 } while(*line);
879
880 fclose(f);
881
882 return ret;
883 }
884
885 uint32_t kvmppc_get_tbfreq(void)
886 {
887 char line[512];
888 char *ns;
889 uint32_t retval = get_ticks_per_sec();
890
891 if (read_cpuinfo("timebase", line, sizeof(line))) {
892 return retval;
893 }
894
895 if (!(ns = strchr(line, ':'))) {
896 return retval;
897 }
898
899 ns++;
900
901 retval = atoi(ns);
902 return retval;
903 }
904
905 /* Try to find a device tree node for a CPU with clock-frequency property */
906 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
907 {
908 struct dirent *dirp;
909 DIR *dp;
910
911 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
912 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
913 return -1;
914 }
915
916 buf[0] = '\0';
917 while ((dirp = readdir(dp)) != NULL) {
918 FILE *f;
919 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
920 dirp->d_name);
921 f = fopen(buf, "r");
922 if (f) {
923 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
924 fclose(f);
925 break;
926 }
927 buf[0] = '\0';
928 }
929 closedir(dp);
930 if (buf[0] == '\0') {
931 printf("Unknown host!\n");
932 return -1;
933 }
934
935 return 0;
936 }
937
938 /* Read a CPU node property from the host device tree that's a single
939 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
940 * (can't find or open the property, or doesn't understand the
941 * format) */
942 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
943 {
944 char buf[PATH_MAX];
945 union {
946 uint32_t v32;
947 uint64_t v64;
948 } u;
949 FILE *f;
950 int len;
951
952 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
953 return -1;
954 }
955
956 strncat(buf, "/", sizeof(buf) - strlen(buf));
957 strncat(buf, propname, sizeof(buf) - strlen(buf));
958
959 f = fopen(buf, "rb");
960 if (!f) {
961 return -1;
962 }
963
964 len = fread(&u, 1, sizeof(u), f);
965 fclose(f);
966 switch (len) {
967 case 4:
968 /* property is a 32-bit quantity */
969 return be32_to_cpu(u.v32);
970 case 8:
971 return be64_to_cpu(u.v64);
972 }
973
974 return 0;
975 }
976
977 uint64_t kvmppc_get_clockfreq(void)
978 {
979 return kvmppc_read_int_cpu_dt("clock-frequency");
980 }
981
982 uint32_t kvmppc_get_vmx(void)
983 {
984 return kvmppc_read_int_cpu_dt("ibm,vmx");
985 }
986
987 uint32_t kvmppc_get_dfp(void)
988 {
989 return kvmppc_read_int_cpu_dt("ibm,dfp");
990 }
991
992 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
993 {
994 PowerPCCPU *cpu = ppc_env_get_cpu(env);
995 CPUState *cs = CPU(cpu);
996 uint32_t *hc = (uint32_t*)buf;
997
998 struct kvm_ppc_pvinfo pvinfo;
999
1000 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1001 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
1002 memcpy(buf, pvinfo.hcall, buf_len);
1003
1004 return 0;
1005 }
1006
1007 /*
1008 * Fallback to always fail hypercalls:
1009 *
1010 * li r3, -1
1011 * nop
1012 * nop
1013 * nop
1014 */
1015
1016 hc[0] = 0x3860ffff;
1017 hc[1] = 0x60000000;
1018 hc[2] = 0x60000000;
1019 hc[3] = 0x60000000;
1020
1021 return 0;
1022 }
1023
1024 void kvmppc_set_papr(PowerPCCPU *cpu)
1025 {
1026 CPUPPCState *env = &cpu->env;
1027 CPUState *cs = CPU(cpu);
1028 struct kvm_enable_cap cap = {};
1029 int ret;
1030
1031 cap.cap = KVM_CAP_PPC_PAPR;
1032 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1033
1034 if (ret) {
1035 cpu_abort(env, "This KVM version does not support PAPR\n");
1036 }
1037 }
1038
1039 int kvmppc_smt_threads(void)
1040 {
1041 return cap_ppc_smt ? cap_ppc_smt : 1;
1042 }
1043
1044 #ifdef TARGET_PPC64
1045 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1046 {
1047 void *rma;
1048 off_t size;
1049 int fd;
1050 struct kvm_allocate_rma ret;
1051 MemoryRegion *rma_region;
1052
1053 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1054 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1055 * not necessary on this hardware
1056 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1057 *
1058 * FIXME: We should allow the user to force contiguous RMA
1059 * allocation in the cap_ppc_rma==1 case.
1060 */
1061 if (cap_ppc_rma < 2) {
1062 return 0;
1063 }
1064
1065 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1066 if (fd < 0) {
1067 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1068 strerror(errno));
1069 return -1;
1070 }
1071
1072 size = MIN(ret.rma_size, 256ul << 20);
1073
1074 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1075 if (rma == MAP_FAILED) {
1076 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1077 return -1;
1078 };
1079
1080 rma_region = g_new(MemoryRegion, 1);
1081 memory_region_init_ram_ptr(rma_region, name, size, rma);
1082 vmstate_register_ram_global(rma_region);
1083 memory_region_add_subregion(sysmem, 0, rma_region);
1084
1085 return size;
1086 }
1087
1088 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1089 {
1090 if (cap_ppc_rma >= 2) {
1091 return current_size;
1092 }
1093 return MIN(current_size,
1094 getrampagesize() << (hash_shift - 7));
1095 }
1096 #endif
1097
1098 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1099 {
1100 struct kvm_create_spapr_tce args = {
1101 .liobn = liobn,
1102 .window_size = window_size,
1103 };
1104 long len;
1105 int fd;
1106 void *table;
1107
1108 /* Must set fd to -1 so we don't try to munmap when called for
1109 * destroying the table, which the upper layers -will- do
1110 */
1111 *pfd = -1;
1112 if (!cap_spapr_tce) {
1113 return NULL;
1114 }
1115
1116 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1117 if (fd < 0) {
1118 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1119 liobn);
1120 return NULL;
1121 }
1122
1123 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1124 /* FIXME: round this up to page size */
1125
1126 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1127 if (table == MAP_FAILED) {
1128 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1129 liobn);
1130 close(fd);
1131 return NULL;
1132 }
1133
1134 *pfd = fd;
1135 return table;
1136 }
1137
1138 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1139 {
1140 long len;
1141
1142 if (fd < 0) {
1143 return -1;
1144 }
1145
1146 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1147 if ((munmap(table, len) < 0) ||
1148 (close(fd) < 0)) {
1149 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1150 strerror(errno));
1151 /* Leak the table */
1152 }
1153
1154 return 0;
1155 }
1156
1157 int kvmppc_reset_htab(int shift_hint)
1158 {
1159 uint32_t shift = shift_hint;
1160
1161 if (!kvm_enabled()) {
1162 /* Full emulation, tell caller to allocate htab itself */
1163 return 0;
1164 }
1165 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1166 int ret;
1167 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1168 if (ret == -ENOTTY) {
1169 /* At least some versions of PR KVM advertise the
1170 * capability, but don't implement the ioctl(). Oops.
1171 * Return 0 so that we allocate the htab in qemu, as is
1172 * correct for PR. */
1173 return 0;
1174 } else if (ret < 0) {
1175 return ret;
1176 }
1177 return shift;
1178 }
1179
1180 /* We have a kernel that predates the htab reset calls. For PR
1181 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1182 * this era, it has allocated a 16MB fixed size hash table
1183 * already. Kernels of this era have the GET_PVINFO capability
1184 * only on PR, so we use this hack to determine the right
1185 * answer */
1186 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1187 /* PR - tell caller to allocate htab */
1188 return 0;
1189 } else {
1190 /* HV - assume 16MB kernel allocated htab */
1191 return 24;
1192 }
1193 }
1194
1195 static inline uint32_t mfpvr(void)
1196 {
1197 uint32_t pvr;
1198
1199 asm ("mfpvr %0"
1200 : "=r"(pvr));
1201 return pvr;
1202 }
1203
1204 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1205 {
1206 if (on) {
1207 *word |= flags;
1208 } else {
1209 *word &= ~flags;
1210 }
1211 }
1212
1213 const ppc_def_t *kvmppc_host_cpu_def(void)
1214 {
1215 uint32_t host_pvr = mfpvr();
1216 const ppc_def_t *base_spec;
1217 ppc_def_t *spec;
1218 uint32_t vmx = kvmppc_get_vmx();
1219 uint32_t dfp = kvmppc_get_dfp();
1220
1221 base_spec = ppc_find_by_pvr(host_pvr);
1222
1223 spec = g_malloc0(sizeof(*spec));
1224 memcpy(spec, base_spec, sizeof(*spec));
1225
1226 /* Now fix up the spec with information we can query from the host */
1227
1228 if (vmx != -1) {
1229 /* Only override when we know what the host supports */
1230 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1231 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1232 }
1233 if (dfp != -1) {
1234 /* Only override when we know what the host supports */
1235 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1236 }
1237
1238 return spec;
1239 }
1240
1241 int kvmppc_fixup_cpu(CPUPPCState *env)
1242 {
1243 int smt;
1244
1245 /* Adjust cpu index for SMT */
1246 smt = kvmppc_smt_threads();
1247 env->cpu_index = (env->cpu_index / smp_threads) * smt
1248 + (env->cpu_index % smp_threads);
1249
1250 return 0;
1251 }
1252
1253
1254 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1255 {
1256 return true;
1257 }
1258
1259 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1260 {
1261 return 1;
1262 }
1263
1264 int kvm_arch_on_sigbus(int code, void *addr)
1265 {
1266 return 1;
1267 }