]> git.proxmox.com Git - qemu.git/blame - target-ppc/kvm.c
pseries: Use new method to correct reset sequence
[qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
26#include "qemu-timer.h"
27#include "sysemu.h"
28#include "kvm.h"
29#include "kvm_ppc.h"
30#include "cpu.h"
12b1143b 31#include "cpus.h"
d76d1650 32#include "device_tree.h"
0f5cb298 33#include "hw/sysbus.h"
e97c3636 34#include "hw/spapr.h"
d76d1650 35
f61b4bed
AG
36#include "hw/sysbus.h"
37#include "hw/spapr.h"
38#include "hw/spapr_vio.h"
39
d76d1650
AJ
40//#define DEBUG_KVM
41
42#ifdef DEBUG_KVM
43#define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45#else
46#define dprintf(fmt, ...) \
47 do { } while (0)
48#endif
49
eadaada1
AG
50#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
94a8d39a
JK
52const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54};
55
fc87e185
AG
56static int cap_interrupt_unset = false;
57static int cap_interrupt_level = false;
90dc8812 58static int cap_segstate;
90dc8812 59static int cap_booke_sregs;
e97c3636 60static int cap_ppc_smt;
354ac20a 61static int cap_ppc_rma;
0f5cb298 62static int cap_spapr_tce;
f1af19d7 63static int cap_hior;
fc87e185 64
c821c2bd
AG
65/* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
c6a94ba5 69 *
c821c2bd
AG
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
c6a94ba5
AG
73 */
74static QEMUTimer *idle_timer;
75
c821c2bd 76static void kvm_kick_env(void *env)
c6a94ba5 77{
c821c2bd 78 qemu_cpu_kick(env);
c6a94ba5
AG
79}
80
cad1e282 81int kvm_arch_init(KVMState *s)
d76d1650 82{
fc87e185 83 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 84 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 85 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 86 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 87 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 88 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 89 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
f1af19d7 90 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
fc87e185
AG
91
92 if (!cap_interrupt_level) {
93 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
94 "VM to stall at times!\n");
95 }
96
d76d1650
AJ
97 return 0;
98}
99
1328c2bf 100static int kvm_arch_sync_sregs(CPUPPCState *cenv)
d76d1650 101{
861bbc80 102 struct kvm_sregs sregs;
5666ca4a
SW
103 int ret;
104
105 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
106 /* What we're really trying to say is "if we're on BookE, we use
107 the native PVR for now". This is the only sane way to check
108 it though, so we potentially confuse users that they can run
109 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
110 return 0;
111 } else {
90dc8812 112 if (!cap_segstate) {
64e07be5
AG
113 fprintf(stderr, "kvm error: missing PVR setting capability\n");
114 return -ENOSYS;
5666ca4a 115 }
5666ca4a
SW
116 }
117
118 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
119 if (ret) {
120 return ret;
121 }
861bbc80
AG
122
123 sregs.pvr = cenv->spr[SPR_PVR];
5666ca4a
SW
124 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
125}
126
93dd5e85 127/* Set up a shared TLB array with KVM */
1328c2bf 128static int kvm_booke206_tlb_init(CPUPPCState *env)
93dd5e85
SW
129{
130 struct kvm_book3e_206_tlb_params params = {};
131 struct kvm_config_tlb cfg = {};
132 struct kvm_enable_cap encap = {};
133 unsigned int entries = 0;
134 int ret, i;
135
136 if (!kvm_enabled() ||
137 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
138 return 0;
139 }
140
141 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
142
143 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
144 params.tlb_sizes[i] = booke206_tlb_size(env, i);
145 params.tlb_ways[i] = booke206_tlb_ways(env, i);
146 entries += params.tlb_sizes[i];
147 }
148
149 assert(entries == env->nb_tlb);
150 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
151
152 env->tlb_dirty = true;
153
154 cfg.array = (uintptr_t)env->tlb.tlbm;
155 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
156 cfg.params = (uintptr_t)&params;
157 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
158
159 encap.cap = KVM_CAP_SW_TLB;
160 encap.args[0] = (uintptr_t)&cfg;
161
162 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
163 if (ret < 0) {
164 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
165 __func__, strerror(-ret));
166 return ret;
167 }
168
169 env->kvm_sw_tlb = true;
170 return 0;
171}
172
4656e1f0
BH
173
174#if defined(TARGET_PPC64)
175static void kvm_get_fallback_smmu_info(CPUPPCState *env,
176 struct kvm_ppc_smmu_info *info)
177{
178 memset(info, 0, sizeof(*info));
179
180 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
181 * need to "guess" what the supported page sizes are.
182 *
183 * For that to work we make a few assumptions:
184 *
185 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
186 * KVM which only supports 4K and 16M pages, but supports them
187 * regardless of the backing store characteritics. We also don't
188 * support 1T segments.
189 *
190 * This is safe as if HV KVM ever supports that capability or PR
191 * KVM grows supports for more page/segment sizes, those versions
192 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
193 * will not hit this fallback
194 *
195 * - Else we are running HV KVM. This means we only support page
196 * sizes that fit in the backing store. Additionally we only
197 * advertize 64K pages if the processor is ARCH 2.06 and we assume
198 * P7 encodings for the SLB and hash table. Here too, we assume
199 * support for any newer processor will mean a kernel that
200 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
201 * this fallback.
202 */
203 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
204 /* No flags */
205 info->flags = 0;
206 info->slb_size = 64;
207
208 /* Standard 4k base page size segment */
209 info->sps[0].page_shift = 12;
210 info->sps[0].slb_enc = 0;
211 info->sps[0].enc[0].page_shift = 12;
212 info->sps[0].enc[0].pte_enc = 0;
213
214 /* Standard 16M large page size segment */
215 info->sps[1].page_shift = 24;
216 info->sps[1].slb_enc = SLB_VSID_L;
217 info->sps[1].enc[0].page_shift = 24;
218 info->sps[1].enc[0].pte_enc = 0;
219 } else {
220 int i = 0;
221
222 /* HV KVM has backing store size restrictions */
223 info->flags = KVM_PPC_PAGE_SIZES_REAL;
224
225 if (env->mmu_model & POWERPC_MMU_1TSEG) {
226 info->flags |= KVM_PPC_1T_SEGMENTS;
227 }
228
229 if (env->mmu_model == POWERPC_MMU_2_06) {
230 info->slb_size = 32;
231 } else {
232 info->slb_size = 64;
233 }
234
235 /* Standard 4k base page size segment */
236 info->sps[i].page_shift = 12;
237 info->sps[i].slb_enc = 0;
238 info->sps[i].enc[0].page_shift = 12;
239 info->sps[i].enc[0].pte_enc = 0;
240 i++;
241
242 /* 64K on MMU 2.06 */
243 if (env->mmu_model == POWERPC_MMU_2_06) {
244 info->sps[i].page_shift = 16;
245 info->sps[i].slb_enc = 0x110;
246 info->sps[i].enc[0].page_shift = 16;
247 info->sps[i].enc[0].pte_enc = 1;
248 i++;
249 }
250
251 /* Standard 16M large page size segment */
252 info->sps[i].page_shift = 24;
253 info->sps[i].slb_enc = SLB_VSID_L;
254 info->sps[i].enc[0].page_shift = 24;
255 info->sps[i].enc[0].pte_enc = 0;
256 }
257}
258
259static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
260{
261 int ret;
262
263 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
264 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
265 if (ret == 0) {
266 return;
267 }
268 }
269
270 kvm_get_fallback_smmu_info(env, info);
271}
272
273static long getrampagesize(void)
274{
275 struct statfs fs;
276 int ret;
277
278 if (!mem_path) {
279 /* guest RAM is backed by normal anonymous pages */
280 return getpagesize();
281 }
282
283 do {
284 ret = statfs(mem_path, &fs);
285 } while (ret != 0 && errno == EINTR);
286
287 if (ret != 0) {
288 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
289 strerror(errno));
290 exit(1);
291 }
292
293#define HUGETLBFS_MAGIC 0x958458f6
294
295 if (fs.f_type != HUGETLBFS_MAGIC) {
296 /* Explicit mempath, but it's ordinary pages */
297 return getpagesize();
298 }
299
300 /* It's hugepage, return the huge page size */
301 return fs.f_bsize;
302}
303
304static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
305{
306 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
307 return true;
308 }
309
310 return (1ul << shift) <= rampgsize;
311}
312
313static void kvm_fixup_page_sizes(CPUPPCState *env)
314{
315 static struct kvm_ppc_smmu_info smmu_info;
316 static bool has_smmu_info;
317 long rampagesize;
318 int iq, ik, jq, jk;
319
320 /* We only handle page sizes for 64-bit server guests for now */
321 if (!(env->mmu_model & POWERPC_MMU_64)) {
322 return;
323 }
324
325 /* Collect MMU info from kernel if not already */
326 if (!has_smmu_info) {
327 kvm_get_smmu_info(env, &smmu_info);
328 has_smmu_info = true;
329 }
330
331 rampagesize = getrampagesize();
332
333 /* Convert to QEMU form */
334 memset(&env->sps, 0, sizeof(env->sps));
335
336 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
337 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
338 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
339
340 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
341 ksps->page_shift)) {
342 continue;
343 }
344 qsps->page_shift = ksps->page_shift;
345 qsps->slb_enc = ksps->slb_enc;
346 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
347 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
348 ksps->enc[jk].page_shift)) {
349 continue;
350 }
351 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
352 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
353 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
354 break;
355 }
356 }
357 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
358 break;
359 }
360 }
361 env->slb_nr = smmu_info.slb_size;
362 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
363 env->mmu_model |= POWERPC_MMU_1TSEG;
364 } else {
365 env->mmu_model &= ~POWERPC_MMU_1TSEG;
366 }
367}
368#else /* defined (TARGET_PPC64) */
369
370static inline void kvm_fixup_page_sizes(CPUPPCState *env)
371{
372}
373
374#endif /* !defined (TARGET_PPC64) */
375
1328c2bf 376int kvm_arch_init_vcpu(CPUPPCState *cenv)
5666ca4a
SW
377{
378 int ret;
379
4656e1f0
BH
380 /* Gather server mmu info from KVM and update the CPU state */
381 kvm_fixup_page_sizes(cenv);
382
383 /* Synchronize sregs with kvm */
5666ca4a
SW
384 ret = kvm_arch_sync_sregs(cenv);
385 if (ret) {
386 return ret;
387 }
861bbc80 388
74475455 389 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
c821c2bd 390
93dd5e85
SW
391 /* Some targets support access to KVM's guest TLB. */
392 switch (cenv->mmu_model) {
393 case POWERPC_MMU_BOOKE206:
394 ret = kvm_booke206_tlb_init(cenv);
395 break;
396 default:
397 break;
398 }
399
861bbc80 400 return ret;
d76d1650
AJ
401}
402
1328c2bf 403void kvm_arch_reset_vcpu(CPUPPCState *env)
caa5af0f
JK
404{
405}
406
1328c2bf 407static void kvm_sw_tlb_put(CPUPPCState *env)
93dd5e85
SW
408{
409 struct kvm_dirty_tlb dirty_tlb;
410 unsigned char *bitmap;
411 int ret;
412
413 if (!env->kvm_sw_tlb) {
414 return;
415 }
416
417 bitmap = g_malloc((env->nb_tlb + 7) / 8);
418 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
419
420 dirty_tlb.bitmap = (uintptr_t)bitmap;
421 dirty_tlb.num_dirty = env->nb_tlb;
422
423 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
424 if (ret) {
425 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
426 __func__, strerror(-ret));
427 }
428
429 g_free(bitmap);
430}
431
1328c2bf 432int kvm_arch_put_registers(CPUPPCState *env, int level)
d76d1650
AJ
433{
434 struct kvm_regs regs;
435 int ret;
436 int i;
437
438 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
439 if (ret < 0)
440 return ret;
441
442 regs.ctr = env->ctr;
443 regs.lr = env->lr;
444 regs.xer = env->xer;
445 regs.msr = env->msr;
446 regs.pc = env->nip;
447
448 regs.srr0 = env->spr[SPR_SRR0];
449 regs.srr1 = env->spr[SPR_SRR1];
450
451 regs.sprg0 = env->spr[SPR_SPRG0];
452 regs.sprg1 = env->spr[SPR_SPRG1];
453 regs.sprg2 = env->spr[SPR_SPRG2];
454 regs.sprg3 = env->spr[SPR_SPRG3];
455 regs.sprg4 = env->spr[SPR_SPRG4];
456 regs.sprg5 = env->spr[SPR_SPRG5];
457 regs.sprg6 = env->spr[SPR_SPRG6];
458 regs.sprg7 = env->spr[SPR_SPRG7];
459
90dc8812
SW
460 regs.pid = env->spr[SPR_BOOKE_PID];
461
d76d1650
AJ
462 for (i = 0;i < 32; i++)
463 regs.gpr[i] = env->gpr[i];
464
465 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
466 if (ret < 0)
467 return ret;
468
93dd5e85
SW
469 if (env->tlb_dirty) {
470 kvm_sw_tlb_put(env);
471 env->tlb_dirty = false;
472 }
473
f1af19d7
DG
474 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
475 struct kvm_sregs sregs;
476
477 sregs.pvr = env->spr[SPR_PVR];
478
479 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
480
481 /* Sync SLB */
482#ifdef TARGET_PPC64
483 for (i = 0; i < 64; i++) {
484 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
485 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
486 }
487#endif
488
489 /* Sync SRs */
490 for (i = 0; i < 16; i++) {
491 sregs.u.s.ppc32.sr[i] = env->sr[i];
492 }
493
494 /* Sync BATs */
495 for (i = 0; i < 8; i++) {
496 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[1][i] << 32)
497 | env->DBAT[0][i];
498 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[1][i] << 32)
499 | env->IBAT[0][i];
500 }
501
502 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
503 if (ret) {
504 return ret;
505 }
506 }
507
508 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
509 uint64_t hior = env->spr[SPR_HIOR];
510 struct kvm_one_reg reg = {
511 .id = KVM_REG_PPC_HIOR,
512 .addr = (uintptr_t) &hior,
513 };
514
515 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
516 if (ret) {
517 return ret;
518 }
519 }
520
d76d1650
AJ
521 return ret;
522}
523
1328c2bf 524int kvm_arch_get_registers(CPUPPCState *env)
d76d1650
AJ
525{
526 struct kvm_regs regs;
ba5e5090 527 struct kvm_sregs sregs;
90dc8812 528 uint32_t cr;
138b38b6 529 int i, ret;
d76d1650
AJ
530
531 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
532 if (ret < 0)
533 return ret;
534
90dc8812
SW
535 cr = regs.cr;
536 for (i = 7; i >= 0; i--) {
537 env->crf[i] = cr & 15;
538 cr >>= 4;
539 }
ba5e5090 540
d76d1650
AJ
541 env->ctr = regs.ctr;
542 env->lr = regs.lr;
543 env->xer = regs.xer;
544 env->msr = regs.msr;
545 env->nip = regs.pc;
546
547 env->spr[SPR_SRR0] = regs.srr0;
548 env->spr[SPR_SRR1] = regs.srr1;
549
550 env->spr[SPR_SPRG0] = regs.sprg0;
551 env->spr[SPR_SPRG1] = regs.sprg1;
552 env->spr[SPR_SPRG2] = regs.sprg2;
553 env->spr[SPR_SPRG3] = regs.sprg3;
554 env->spr[SPR_SPRG4] = regs.sprg4;
555 env->spr[SPR_SPRG5] = regs.sprg5;
556 env->spr[SPR_SPRG6] = regs.sprg6;
557 env->spr[SPR_SPRG7] = regs.sprg7;
558
90dc8812
SW
559 env->spr[SPR_BOOKE_PID] = regs.pid;
560
d76d1650
AJ
561 for (i = 0;i < 32; i++)
562 env->gpr[i] = regs.gpr[i];
563
90dc8812
SW
564 if (cap_booke_sregs) {
565 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
566 if (ret < 0) {
567 return ret;
568 }
569
570 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
571 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
572 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
573 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
574 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
575 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
576 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
577 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
578 env->spr[SPR_DECR] = sregs.u.e.dec;
579 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
580 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
581 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
582 }
583
584 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
585 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
586 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
587 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
588 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
589 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
590 }
591
592 if (sregs.u.e.features & KVM_SREGS_E_64) {
593 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
594 }
595
596 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
597 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
598 }
599
600 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
601 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
602 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
603 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
604 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
605 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
606 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
607 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
608 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
609 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
610 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
611 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
612 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
613 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
614 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
615 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
616 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
617
618 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
619 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
620 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
621 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
622 }
623
624 if (sregs.u.e.features & KVM_SREGS_E_PM) {
625 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
626 }
627
628 if (sregs.u.e.features & KVM_SREGS_E_PC) {
629 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
630 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
631 }
632 }
633
634 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
635 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
636 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
637 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
638 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
639 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
640 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
641 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
642 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
643 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
644 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
645 }
646
647 if (sregs.u.e.features & KVM_SREGS_EXP) {
648 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
649 }
650
651 if (sregs.u.e.features & KVM_SREGS_E_PD) {
652 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
653 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
654 }
655
656 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
657 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
658 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
659 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
660
661 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
662 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
663 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
664 }
665 }
fafc0b6a 666 }
90dc8812 667
90dc8812
SW
668 if (cap_segstate) {
669 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
670 if (ret < 0) {
671 return ret;
672 }
673
bb593904 674 ppc_store_sdr1(env, sregs.u.s.sdr1);
ba5e5090
AG
675
676 /* Sync SLB */
82c09f2f 677#ifdef TARGET_PPC64
ba5e5090
AG
678 for (i = 0; i < 64; i++) {
679 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
680 sregs.u.s.ppc64.slb[i].slbv);
681 }
82c09f2f 682#endif
ba5e5090
AG
683
684 /* Sync SRs */
685 for (i = 0; i < 16; i++) {
686 env->sr[i] = sregs.u.s.ppc32.sr[i];
687 }
688
689 /* Sync BATs */
690 for (i = 0; i < 8; i++) {
691 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
692 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
693 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
694 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
695 }
fafc0b6a 696 }
ba5e5090 697
d76d1650
AJ
698 return 0;
699}
700
1328c2bf 701int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
fc87e185
AG
702{
703 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
704
705 if (irq != PPC_INTERRUPT_EXT) {
706 return 0;
707 }
708
709 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
710 return 0;
711 }
712
713 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
714
715 return 0;
716}
717
16415335
AG
718#if defined(TARGET_PPCEMB)
719#define PPC_INPUT_INT PPC40x_INPUT_INT
720#elif defined(TARGET_PPC64)
721#define PPC_INPUT_INT PPC970_INPUT_INT
722#else
723#define PPC_INPUT_INT PPC6xx_INPUT_INT
724#endif
725
1328c2bf 726void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
d76d1650
AJ
727{
728 int r;
729 unsigned irq;
730
5cbdb3a3 731 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 732 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
733 if (!cap_interrupt_level &&
734 run->ready_for_interrupt_injection &&
d76d1650 735 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 736 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
737 {
738 /* For now KVM disregards the 'irq' argument. However, in the
739 * future KVM could cache it in-kernel to avoid a heavyweight exit
740 * when reading the UIC.
741 */
fc87e185 742 irq = KVM_INTERRUPT_SET;
d76d1650
AJ
743
744 dprintf("injected interrupt %d\n", irq);
745 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
746 if (r < 0)
747 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
c821c2bd
AG
748
749 /* Always wake up soon in case the interrupt was level based */
74475455 750 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
c821c2bd 751 (get_ticks_per_sec() / 50));
d76d1650
AJ
752 }
753
754 /* We don't know if there are more interrupts pending after this. However,
755 * the guest will return to userspace in the course of handling this one
756 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
757}
758
1328c2bf 759void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
d76d1650 760{
d76d1650
AJ
761}
762
1328c2bf 763int kvm_arch_process_async_events(CPUPPCState *env)
0af691d7 764{
157feead 765 return env->halted;
0af691d7
MT
766}
767
1328c2bf 768static int kvmppc_handle_halt(CPUPPCState *env)
d76d1650
AJ
769{
770 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
771 env->halted = 1;
772 env->exception_index = EXCP_HLT;
773 }
774
bb4ea393 775 return 0;
d76d1650
AJ
776}
777
778/* map dcr access to existing qemu dcr emulation */
1328c2bf 779static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
780{
781 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
782 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
783
bb4ea393 784 return 0;
d76d1650
AJ
785}
786
1328c2bf 787static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
788{
789 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
790 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
791
bb4ea393 792 return 0;
d76d1650
AJ
793}
794
1328c2bf 795int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
d76d1650 796{
bb4ea393 797 int ret;
d76d1650
AJ
798
799 switch (run->exit_reason) {
800 case KVM_EXIT_DCR:
801 if (run->dcr.is_write) {
802 dprintf("handle dcr write\n");
803 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
804 } else {
805 dprintf("handle dcr read\n");
806 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
807 }
808 break;
809 case KVM_EXIT_HLT:
810 dprintf("handle halt\n");
811 ret = kvmppc_handle_halt(env);
812 break;
f61b4bed
AG
813#ifdef CONFIG_PSERIES
814 case KVM_EXIT_PAPR_HCALL:
815 dprintf("handle PAPR hypercall\n");
816 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
817 run->papr_hcall.args);
78e8fde2 818 ret = 0;
f61b4bed
AG
819 break;
820#endif
73aaec4a
JK
821 default:
822 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
823 ret = -1;
824 break;
d76d1650
AJ
825 }
826
827 return ret;
828}
829
dc333cd6
AG
830static int read_cpuinfo(const char *field, char *value, int len)
831{
832 FILE *f;
833 int ret = -1;
834 int field_len = strlen(field);
835 char line[512];
836
837 f = fopen("/proc/cpuinfo", "r");
838 if (!f) {
839 return -1;
840 }
841
842 do {
843 if(!fgets(line, sizeof(line), f)) {
844 break;
845 }
846 if (!strncmp(line, field, field_len)) {
847 strncpy(value, line, len);
848 ret = 0;
849 break;
850 }
851 } while(*line);
852
853 fclose(f);
854
855 return ret;
856}
857
858uint32_t kvmppc_get_tbfreq(void)
859{
860 char line[512];
861 char *ns;
862 uint32_t retval = get_ticks_per_sec();
863
864 if (read_cpuinfo("timebase", line, sizeof(line))) {
865 return retval;
866 }
867
868 if (!(ns = strchr(line, ':'))) {
869 return retval;
870 }
871
872 ns++;
873
874 retval = atoi(ns);
875 return retval;
876}
4513d923 877
eadaada1
AG
878/* Try to find a device tree node for a CPU with clock-frequency property */
879static int kvmppc_find_cpu_dt(char *buf, int buf_len)
880{
881 struct dirent *dirp;
882 DIR *dp;
883
884 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
885 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
886 return -1;
887 }
888
889 buf[0] = '\0';
890 while ((dirp = readdir(dp)) != NULL) {
891 FILE *f;
892 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
893 dirp->d_name);
894 f = fopen(buf, "r");
895 if (f) {
896 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
897 fclose(f);
898 break;
899 }
900 buf[0] = '\0';
901 }
902 closedir(dp);
903 if (buf[0] == '\0') {
904 printf("Unknown host!\n");
905 return -1;
906 }
907
908 return 0;
909}
910
9bc884b7
DG
911/* Read a CPU node property from the host device tree that's a single
912 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
913 * (can't find or open the property, or doesn't understand the
914 * format) */
915static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 916{
9bc884b7
DG
917 char buf[PATH_MAX];
918 union {
919 uint32_t v32;
920 uint64_t v64;
921 } u;
eadaada1
AG
922 FILE *f;
923 int len;
924
925 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 926 return -1;
eadaada1
AG
927 }
928
9bc884b7
DG
929 strncat(buf, "/", sizeof(buf) - strlen(buf));
930 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
931
932 f = fopen(buf, "rb");
933 if (!f) {
934 return -1;
935 }
936
9bc884b7 937 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
938 fclose(f);
939 switch (len) {
9bc884b7
DG
940 case 4:
941 /* property is a 32-bit quantity */
942 return be32_to_cpu(u.v32);
943 case 8:
944 return be64_to_cpu(u.v64);
eadaada1
AG
945 }
946
947 return 0;
948}
949
9bc884b7
DG
950uint64_t kvmppc_get_clockfreq(void)
951{
952 return kvmppc_read_int_cpu_dt("clock-frequency");
953}
954
6659394f
DG
955uint32_t kvmppc_get_vmx(void)
956{
957 return kvmppc_read_int_cpu_dt("ibm,vmx");
958}
959
960uint32_t kvmppc_get_dfp(void)
961{
962 return kvmppc_read_int_cpu_dt("ibm,dfp");
963}
964
1328c2bf 965int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
966{
967 uint32_t *hc = (uint32_t*)buf;
968
45024f09
AG
969 struct kvm_ppc_pvinfo pvinfo;
970
971 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
972 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
973 memcpy(buf, pvinfo.hcall, buf_len);
974
975 return 0;
976 }
45024f09
AG
977
978 /*
979 * Fallback to always fail hypercalls:
980 *
981 * li r3, -1
982 * nop
983 * nop
984 * nop
985 */
986
987 hc[0] = 0x3860ffff;
988 hc[1] = 0x60000000;
989 hc[2] = 0x60000000;
990 hc[3] = 0x60000000;
991
992 return 0;
993}
994
1328c2bf 995void kvmppc_set_papr(CPUPPCState *env)
f61b4bed 996{
94135e81 997 struct kvm_enable_cap cap = {};
f61b4bed
AG
998 int ret;
999
f61b4bed
AG
1000 cap.cap = KVM_CAP_PPC_PAPR;
1001 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1002
1003 if (ret) {
f1af19d7 1004 cpu_abort(env, "This KVM version does not support PAPR\n");
94135e81 1005 }
f61b4bed
AG
1006}
1007
e97c3636
DG
1008int kvmppc_smt_threads(void)
1009{
1010 return cap_ppc_smt ? cap_ppc_smt : 1;
1011}
1012
354ac20a
DG
1013off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1014{
1015 void *rma;
1016 off_t size;
1017 int fd;
1018 struct kvm_allocate_rma ret;
1019 MemoryRegion *rma_region;
1020
1021 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1022 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1023 * not necessary on this hardware
1024 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1025 *
1026 * FIXME: We should allow the user to force contiguous RMA
1027 * allocation in the cap_ppc_rma==1 case.
1028 */
1029 if (cap_ppc_rma < 2) {
1030 return 0;
1031 }
1032
1033 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1034 if (fd < 0) {
1035 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1036 strerror(errno));
1037 return -1;
1038 }
1039
1040 size = MIN(ret.rma_size, 256ul << 20);
1041
1042 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1043 if (rma == MAP_FAILED) {
1044 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1045 return -1;
1046 };
1047
1048 rma_region = g_new(MemoryRegion, 1);
6148b23d
AK
1049 memory_region_init_ram_ptr(rma_region, name, size, rma);
1050 vmstate_register_ram_global(rma_region);
354ac20a
DG
1051 memory_region_add_subregion(sysmem, 0, rma_region);
1052
1053 return size;
1054}
1055
0f5cb298
DG
1056void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1057{
1058 struct kvm_create_spapr_tce args = {
1059 .liobn = liobn,
1060 .window_size = window_size,
1061 };
1062 long len;
1063 int fd;
1064 void *table;
1065
b5aec396
DG
1066 /* Must set fd to -1 so we don't try to munmap when called for
1067 * destroying the table, which the upper layers -will- do
1068 */
1069 *pfd = -1;
0f5cb298
DG
1070 if (!cap_spapr_tce) {
1071 return NULL;
1072 }
1073
1074 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1075 if (fd < 0) {
b5aec396
DG
1076 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1077 liobn);
0f5cb298
DG
1078 return NULL;
1079 }
1080
ad0ebb91 1081 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
0f5cb298
DG
1082 /* FIXME: round this up to page size */
1083
74b41e56 1084 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1085 if (table == MAP_FAILED) {
b5aec396
DG
1086 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1087 liobn);
0f5cb298
DG
1088 close(fd);
1089 return NULL;
1090 }
1091
1092 *pfd = fd;
1093 return table;
1094}
1095
1096int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1097{
1098 long len;
1099
1100 if (fd < 0) {
1101 return -1;
1102 }
1103
ad0ebb91 1104 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
0f5cb298
DG
1105 if ((munmap(table, len) < 0) ||
1106 (close(fd) < 0)) {
b5aec396
DG
1107 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1108 strerror(errno));
0f5cb298
DG
1109 /* Leak the table */
1110 }
1111
1112 return 0;
1113}
1114
a1e98583
DG
1115static inline uint32_t mfpvr(void)
1116{
1117 uint32_t pvr;
1118
1119 asm ("mfpvr %0"
1120 : "=r"(pvr));
1121 return pvr;
1122}
1123
a7342588
DG
1124static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1125{
1126 if (on) {
1127 *word |= flags;
1128 } else {
1129 *word &= ~flags;
1130 }
1131}
1132
a1e98583
DG
1133const ppc_def_t *kvmppc_host_cpu_def(void)
1134{
1135 uint32_t host_pvr = mfpvr();
1136 const ppc_def_t *base_spec;
a7342588
DG
1137 ppc_def_t *spec;
1138 uint32_t vmx = kvmppc_get_vmx();
1139 uint32_t dfp = kvmppc_get_dfp();
a1e98583
DG
1140
1141 base_spec = ppc_find_by_pvr(host_pvr);
1142
a7342588
DG
1143 spec = g_malloc0(sizeof(*spec));
1144 memcpy(spec, base_spec, sizeof(*spec));
1145
1146 /* Now fix up the spec with information we can query from the host */
1147
70bca53f
AG
1148 if (vmx != -1) {
1149 /* Only override when we know what the host supports */
1150 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1151 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1152 }
1153 if (dfp != -1) {
1154 /* Only override when we know what the host supports */
1155 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1156 }
a7342588
DG
1157
1158 return spec;
a1e98583
DG
1159}
1160
12b1143b
DG
1161int kvmppc_fixup_cpu(CPUPPCState *env)
1162{
1163 int smt;
1164
1165 /* Adjust cpu index for SMT */
1166 smt = kvmppc_smt_threads();
1167 env->cpu_index = (env->cpu_index / smp_threads) * smt
1168 + (env->cpu_index % smp_threads);
1169
1170 return 0;
1171}
1172
1173
1328c2bf 1174bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
4513d923
GN
1175{
1176 return true;
1177}
a1b87fe0 1178
1328c2bf 1179int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
a1b87fe0
JK
1180{
1181 return 1;
1182}
1183
1184int kvm_arch_on_sigbus(int code, void *addr)
1185{
1186 return 1;
1187}