]> git.proxmox.com Git - qemu.git/blame - target-ppc/kvm.c
os-posix: avoid buffer overrun
[qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
26#include "qemu-timer.h"
27#include "sysemu.h"
28#include "kvm.h"
29#include "kvm_ppc.h"
30#include "cpu.h"
12b1143b 31#include "cpus.h"
d76d1650 32#include "device_tree.h"
0f5cb298 33#include "hw/sysbus.h"
e97c3636 34#include "hw/spapr.h"
d76d1650 35
f61b4bed
AG
36#include "hw/sysbus.h"
37#include "hw/spapr.h"
38#include "hw/spapr_vio.h"
39
d76d1650
AJ
40//#define DEBUG_KVM
41
42#ifdef DEBUG_KVM
43#define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45#else
46#define dprintf(fmt, ...) \
47 do { } while (0)
48#endif
49
eadaada1
AG
50#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
94a8d39a
JK
52const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54};
55
fc87e185
AG
56static int cap_interrupt_unset = false;
57static int cap_interrupt_level = false;
90dc8812 58static int cap_segstate;
90dc8812 59static int cap_booke_sregs;
e97c3636 60static int cap_ppc_smt;
354ac20a 61static int cap_ppc_rma;
0f5cb298 62static int cap_spapr_tce;
fc87e185 63
c821c2bd
AG
64/* XXX We have a race condition where we actually have a level triggered
65 * interrupt, but the infrastructure can't expose that yet, so the guest
66 * takes but ignores it, goes to sleep and never gets notified that there's
67 * still an interrupt pending.
c6a94ba5 68 *
c821c2bd
AG
69 * As a quick workaround, let's just wake up again 20 ms after we injected
70 * an interrupt. That way we can assure that we're always reinjecting
71 * interrupts in case the guest swallowed them.
c6a94ba5
AG
72 */
73static QEMUTimer *idle_timer;
74
c821c2bd 75static void kvm_kick_env(void *env)
c6a94ba5 76{
c821c2bd 77 qemu_cpu_kick(env);
c6a94ba5
AG
78}
79
cad1e282 80int kvm_arch_init(KVMState *s)
d76d1650 81{
fc87e185 82 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 83 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 84 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 85 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 86 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 87 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 88 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
fc87e185
AG
89
90 if (!cap_interrupt_level) {
91 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
92 "VM to stall at times!\n");
93 }
94
d76d1650
AJ
95 return 0;
96}
97
1328c2bf 98static int kvm_arch_sync_sregs(CPUPPCState *cenv)
d76d1650 99{
861bbc80 100 struct kvm_sregs sregs;
5666ca4a
SW
101 int ret;
102
103 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
104 /* What we're really trying to say is "if we're on BookE, we use
105 the native PVR for now". This is the only sane way to check
106 it though, so we potentially confuse users that they can run
107 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
108 return 0;
109 } else {
90dc8812 110 if (!cap_segstate) {
64e07be5
AG
111 fprintf(stderr, "kvm error: missing PVR setting capability\n");
112 return -ENOSYS;
5666ca4a 113 }
5666ca4a
SW
114 }
115
116 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
117 if (ret) {
118 return ret;
119 }
861bbc80
AG
120
121 sregs.pvr = cenv->spr[SPR_PVR];
5666ca4a
SW
122 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
123}
124
93dd5e85 125/* Set up a shared TLB array with KVM */
1328c2bf 126static int kvm_booke206_tlb_init(CPUPPCState *env)
93dd5e85
SW
127{
128 struct kvm_book3e_206_tlb_params params = {};
129 struct kvm_config_tlb cfg = {};
130 struct kvm_enable_cap encap = {};
131 unsigned int entries = 0;
132 int ret, i;
133
134 if (!kvm_enabled() ||
135 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
136 return 0;
137 }
138
139 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
140
141 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
142 params.tlb_sizes[i] = booke206_tlb_size(env, i);
143 params.tlb_ways[i] = booke206_tlb_ways(env, i);
144 entries += params.tlb_sizes[i];
145 }
146
147 assert(entries == env->nb_tlb);
148 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
149
150 env->tlb_dirty = true;
151
152 cfg.array = (uintptr_t)env->tlb.tlbm;
153 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
154 cfg.params = (uintptr_t)&params;
155 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
156
157 encap.cap = KVM_CAP_SW_TLB;
158 encap.args[0] = (uintptr_t)&cfg;
159
160 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
161 if (ret < 0) {
162 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
163 __func__, strerror(-ret));
164 return ret;
165 }
166
167 env->kvm_sw_tlb = true;
168 return 0;
169}
170
4656e1f0
BH
171
172#if defined(TARGET_PPC64)
173static void kvm_get_fallback_smmu_info(CPUPPCState *env,
174 struct kvm_ppc_smmu_info *info)
175{
176 memset(info, 0, sizeof(*info));
177
178 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
179 * need to "guess" what the supported page sizes are.
180 *
181 * For that to work we make a few assumptions:
182 *
183 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
184 * KVM which only supports 4K and 16M pages, but supports them
185 * regardless of the backing store characteritics. We also don't
186 * support 1T segments.
187 *
188 * This is safe as if HV KVM ever supports that capability or PR
189 * KVM grows supports for more page/segment sizes, those versions
190 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
191 * will not hit this fallback
192 *
193 * - Else we are running HV KVM. This means we only support page
194 * sizes that fit in the backing store. Additionally we only
195 * advertize 64K pages if the processor is ARCH 2.06 and we assume
196 * P7 encodings for the SLB and hash table. Here too, we assume
197 * support for any newer processor will mean a kernel that
198 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
199 * this fallback.
200 */
201 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
202 /* No flags */
203 info->flags = 0;
204 info->slb_size = 64;
205
206 /* Standard 4k base page size segment */
207 info->sps[0].page_shift = 12;
208 info->sps[0].slb_enc = 0;
209 info->sps[0].enc[0].page_shift = 12;
210 info->sps[0].enc[0].pte_enc = 0;
211
212 /* Standard 16M large page size segment */
213 info->sps[1].page_shift = 24;
214 info->sps[1].slb_enc = SLB_VSID_L;
215 info->sps[1].enc[0].page_shift = 24;
216 info->sps[1].enc[0].pte_enc = 0;
217 } else {
218 int i = 0;
219
220 /* HV KVM has backing store size restrictions */
221 info->flags = KVM_PPC_PAGE_SIZES_REAL;
222
223 if (env->mmu_model & POWERPC_MMU_1TSEG) {
224 info->flags |= KVM_PPC_1T_SEGMENTS;
225 }
226
227 if (env->mmu_model == POWERPC_MMU_2_06) {
228 info->slb_size = 32;
229 } else {
230 info->slb_size = 64;
231 }
232
233 /* Standard 4k base page size segment */
234 info->sps[i].page_shift = 12;
235 info->sps[i].slb_enc = 0;
236 info->sps[i].enc[0].page_shift = 12;
237 info->sps[i].enc[0].pte_enc = 0;
238 i++;
239
240 /* 64K on MMU 2.06 */
241 if (env->mmu_model == POWERPC_MMU_2_06) {
242 info->sps[i].page_shift = 16;
243 info->sps[i].slb_enc = 0x110;
244 info->sps[i].enc[0].page_shift = 16;
245 info->sps[i].enc[0].pte_enc = 1;
246 i++;
247 }
248
249 /* Standard 16M large page size segment */
250 info->sps[i].page_shift = 24;
251 info->sps[i].slb_enc = SLB_VSID_L;
252 info->sps[i].enc[0].page_shift = 24;
253 info->sps[i].enc[0].pte_enc = 0;
254 }
255}
256
257static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
258{
259 int ret;
260
261 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
262 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
263 if (ret == 0) {
264 return;
265 }
266 }
267
268 kvm_get_fallback_smmu_info(env, info);
269}
270
271static long getrampagesize(void)
272{
273 struct statfs fs;
274 int ret;
275
276 if (!mem_path) {
277 /* guest RAM is backed by normal anonymous pages */
278 return getpagesize();
279 }
280
281 do {
282 ret = statfs(mem_path, &fs);
283 } while (ret != 0 && errno == EINTR);
284
285 if (ret != 0) {
286 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
287 strerror(errno));
288 exit(1);
289 }
290
291#define HUGETLBFS_MAGIC 0x958458f6
292
293 if (fs.f_type != HUGETLBFS_MAGIC) {
294 /* Explicit mempath, but it's ordinary pages */
295 return getpagesize();
296 }
297
298 /* It's hugepage, return the huge page size */
299 return fs.f_bsize;
300}
301
302static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
303{
304 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
305 return true;
306 }
307
308 return (1ul << shift) <= rampgsize;
309}
310
311static void kvm_fixup_page_sizes(CPUPPCState *env)
312{
313 static struct kvm_ppc_smmu_info smmu_info;
314 static bool has_smmu_info;
315 long rampagesize;
316 int iq, ik, jq, jk;
317
318 /* We only handle page sizes for 64-bit server guests for now */
319 if (!(env->mmu_model & POWERPC_MMU_64)) {
320 return;
321 }
322
323 /* Collect MMU info from kernel if not already */
324 if (!has_smmu_info) {
325 kvm_get_smmu_info(env, &smmu_info);
326 has_smmu_info = true;
327 }
328
329 rampagesize = getrampagesize();
330
331 /* Convert to QEMU form */
332 memset(&env->sps, 0, sizeof(env->sps));
333
334 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
335 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
336 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
337
338 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
339 ksps->page_shift)) {
340 continue;
341 }
342 qsps->page_shift = ksps->page_shift;
343 qsps->slb_enc = ksps->slb_enc;
344 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
345 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
346 ksps->enc[jk].page_shift)) {
347 continue;
348 }
349 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
350 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
351 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
352 break;
353 }
354 }
355 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
356 break;
357 }
358 }
359 env->slb_nr = smmu_info.slb_size;
360 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
361 env->mmu_model |= POWERPC_MMU_1TSEG;
362 } else {
363 env->mmu_model &= ~POWERPC_MMU_1TSEG;
364 }
365}
366#else /* defined (TARGET_PPC64) */
367
368static inline void kvm_fixup_page_sizes(CPUPPCState *env)
369{
370}
371
372#endif /* !defined (TARGET_PPC64) */
373
1328c2bf 374int kvm_arch_init_vcpu(CPUPPCState *cenv)
5666ca4a
SW
375{
376 int ret;
377
4656e1f0
BH
378 /* Gather server mmu info from KVM and update the CPU state */
379 kvm_fixup_page_sizes(cenv);
380
381 /* Synchronize sregs with kvm */
5666ca4a
SW
382 ret = kvm_arch_sync_sregs(cenv);
383 if (ret) {
384 return ret;
385 }
861bbc80 386
74475455 387 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
c821c2bd 388
93dd5e85
SW
389 /* Some targets support access to KVM's guest TLB. */
390 switch (cenv->mmu_model) {
391 case POWERPC_MMU_BOOKE206:
392 ret = kvm_booke206_tlb_init(cenv);
393 break;
394 default:
395 break;
396 }
397
861bbc80 398 return ret;
d76d1650
AJ
399}
400
1328c2bf 401void kvm_arch_reset_vcpu(CPUPPCState *env)
caa5af0f
JK
402{
403}
404
1328c2bf 405static void kvm_sw_tlb_put(CPUPPCState *env)
93dd5e85
SW
406{
407 struct kvm_dirty_tlb dirty_tlb;
408 unsigned char *bitmap;
409 int ret;
410
411 if (!env->kvm_sw_tlb) {
412 return;
413 }
414
415 bitmap = g_malloc((env->nb_tlb + 7) / 8);
416 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
417
418 dirty_tlb.bitmap = (uintptr_t)bitmap;
419 dirty_tlb.num_dirty = env->nb_tlb;
420
421 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
422 if (ret) {
423 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
424 __func__, strerror(-ret));
425 }
426
427 g_free(bitmap);
428}
429
1328c2bf 430int kvm_arch_put_registers(CPUPPCState *env, int level)
d76d1650
AJ
431{
432 struct kvm_regs regs;
433 int ret;
434 int i;
435
436 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
437 if (ret < 0)
438 return ret;
439
440 regs.ctr = env->ctr;
441 regs.lr = env->lr;
442 regs.xer = env->xer;
443 regs.msr = env->msr;
444 regs.pc = env->nip;
445
446 regs.srr0 = env->spr[SPR_SRR0];
447 regs.srr1 = env->spr[SPR_SRR1];
448
449 regs.sprg0 = env->spr[SPR_SPRG0];
450 regs.sprg1 = env->spr[SPR_SPRG1];
451 regs.sprg2 = env->spr[SPR_SPRG2];
452 regs.sprg3 = env->spr[SPR_SPRG3];
453 regs.sprg4 = env->spr[SPR_SPRG4];
454 regs.sprg5 = env->spr[SPR_SPRG5];
455 regs.sprg6 = env->spr[SPR_SPRG6];
456 regs.sprg7 = env->spr[SPR_SPRG7];
457
90dc8812
SW
458 regs.pid = env->spr[SPR_BOOKE_PID];
459
d76d1650
AJ
460 for (i = 0;i < 32; i++)
461 regs.gpr[i] = env->gpr[i];
462
463 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
464 if (ret < 0)
465 return ret;
466
93dd5e85
SW
467 if (env->tlb_dirty) {
468 kvm_sw_tlb_put(env);
469 env->tlb_dirty = false;
470 }
471
d76d1650
AJ
472 return ret;
473}
474
1328c2bf 475int kvm_arch_get_registers(CPUPPCState *env)
d76d1650
AJ
476{
477 struct kvm_regs regs;
ba5e5090 478 struct kvm_sregs sregs;
90dc8812 479 uint32_t cr;
138b38b6 480 int i, ret;
d76d1650
AJ
481
482 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
483 if (ret < 0)
484 return ret;
485
90dc8812
SW
486 cr = regs.cr;
487 for (i = 7; i >= 0; i--) {
488 env->crf[i] = cr & 15;
489 cr >>= 4;
490 }
ba5e5090 491
d76d1650
AJ
492 env->ctr = regs.ctr;
493 env->lr = regs.lr;
494 env->xer = regs.xer;
495 env->msr = regs.msr;
496 env->nip = regs.pc;
497
498 env->spr[SPR_SRR0] = regs.srr0;
499 env->spr[SPR_SRR1] = regs.srr1;
500
501 env->spr[SPR_SPRG0] = regs.sprg0;
502 env->spr[SPR_SPRG1] = regs.sprg1;
503 env->spr[SPR_SPRG2] = regs.sprg2;
504 env->spr[SPR_SPRG3] = regs.sprg3;
505 env->spr[SPR_SPRG4] = regs.sprg4;
506 env->spr[SPR_SPRG5] = regs.sprg5;
507 env->spr[SPR_SPRG6] = regs.sprg6;
508 env->spr[SPR_SPRG7] = regs.sprg7;
509
90dc8812
SW
510 env->spr[SPR_BOOKE_PID] = regs.pid;
511
d76d1650
AJ
512 for (i = 0;i < 32; i++)
513 env->gpr[i] = regs.gpr[i];
514
90dc8812
SW
515 if (cap_booke_sregs) {
516 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
517 if (ret < 0) {
518 return ret;
519 }
520
521 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
522 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
523 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
524 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
525 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
526 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
527 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
528 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
529 env->spr[SPR_DECR] = sregs.u.e.dec;
530 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
531 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
532 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
533 }
534
535 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
536 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
537 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
538 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
539 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
540 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
541 }
542
543 if (sregs.u.e.features & KVM_SREGS_E_64) {
544 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
545 }
546
547 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
548 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
549 }
550
551 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
552 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
553 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
554 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
555 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
556 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
557 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
558 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
559 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
560 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
561 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
562 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
563 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
564 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
565 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
566 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
567 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
568
569 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
570 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
571 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
572 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
573 }
574
575 if (sregs.u.e.features & KVM_SREGS_E_PM) {
576 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
577 }
578
579 if (sregs.u.e.features & KVM_SREGS_E_PC) {
580 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
581 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
582 }
583 }
584
585 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
586 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
587 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
588 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
589 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
590 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
591 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
592 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
593 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
594 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
595 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
596 }
597
598 if (sregs.u.e.features & KVM_SREGS_EXP) {
599 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
600 }
601
602 if (sregs.u.e.features & KVM_SREGS_E_PD) {
603 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
604 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
605 }
606
607 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
608 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
609 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
610 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
611
612 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
613 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
614 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
615 }
616 }
fafc0b6a 617 }
90dc8812 618
90dc8812
SW
619 if (cap_segstate) {
620 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
621 if (ret < 0) {
622 return ret;
623 }
624
bb593904 625 ppc_store_sdr1(env, sregs.u.s.sdr1);
ba5e5090
AG
626
627 /* Sync SLB */
82c09f2f 628#ifdef TARGET_PPC64
ba5e5090
AG
629 for (i = 0; i < 64; i++) {
630 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
631 sregs.u.s.ppc64.slb[i].slbv);
632 }
82c09f2f 633#endif
ba5e5090
AG
634
635 /* Sync SRs */
636 for (i = 0; i < 16; i++) {
637 env->sr[i] = sregs.u.s.ppc32.sr[i];
638 }
639
640 /* Sync BATs */
641 for (i = 0; i < 8; i++) {
642 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
643 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
644 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
645 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
646 }
fafc0b6a 647 }
ba5e5090 648
d76d1650
AJ
649 return 0;
650}
651
1328c2bf 652int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
fc87e185
AG
653{
654 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
655
656 if (irq != PPC_INTERRUPT_EXT) {
657 return 0;
658 }
659
660 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
661 return 0;
662 }
663
664 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
665
666 return 0;
667}
668
16415335
AG
669#if defined(TARGET_PPCEMB)
670#define PPC_INPUT_INT PPC40x_INPUT_INT
671#elif defined(TARGET_PPC64)
672#define PPC_INPUT_INT PPC970_INPUT_INT
673#else
674#define PPC_INPUT_INT PPC6xx_INPUT_INT
675#endif
676
1328c2bf 677void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
d76d1650
AJ
678{
679 int r;
680 unsigned irq;
681
5cbdb3a3 682 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 683 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
684 if (!cap_interrupt_level &&
685 run->ready_for_interrupt_injection &&
d76d1650 686 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 687 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
688 {
689 /* For now KVM disregards the 'irq' argument. However, in the
690 * future KVM could cache it in-kernel to avoid a heavyweight exit
691 * when reading the UIC.
692 */
fc87e185 693 irq = KVM_INTERRUPT_SET;
d76d1650
AJ
694
695 dprintf("injected interrupt %d\n", irq);
696 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
697 if (r < 0)
698 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
c821c2bd
AG
699
700 /* Always wake up soon in case the interrupt was level based */
74475455 701 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
c821c2bd 702 (get_ticks_per_sec() / 50));
d76d1650
AJ
703 }
704
705 /* We don't know if there are more interrupts pending after this. However,
706 * the guest will return to userspace in the course of handling this one
707 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
708}
709
1328c2bf 710void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
d76d1650 711{
d76d1650
AJ
712}
713
1328c2bf 714int kvm_arch_process_async_events(CPUPPCState *env)
0af691d7 715{
157feead 716 return env->halted;
0af691d7
MT
717}
718
1328c2bf 719static int kvmppc_handle_halt(CPUPPCState *env)
d76d1650
AJ
720{
721 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
722 env->halted = 1;
723 env->exception_index = EXCP_HLT;
724 }
725
bb4ea393 726 return 0;
d76d1650
AJ
727}
728
729/* map dcr access to existing qemu dcr emulation */
1328c2bf 730static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
731{
732 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
733 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
734
bb4ea393 735 return 0;
d76d1650
AJ
736}
737
1328c2bf 738static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
739{
740 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
741 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
742
bb4ea393 743 return 0;
d76d1650
AJ
744}
745
1328c2bf 746int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
d76d1650 747{
bb4ea393 748 int ret;
d76d1650
AJ
749
750 switch (run->exit_reason) {
751 case KVM_EXIT_DCR:
752 if (run->dcr.is_write) {
753 dprintf("handle dcr write\n");
754 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
755 } else {
756 dprintf("handle dcr read\n");
757 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
758 }
759 break;
760 case KVM_EXIT_HLT:
761 dprintf("handle halt\n");
762 ret = kvmppc_handle_halt(env);
763 break;
f61b4bed
AG
764#ifdef CONFIG_PSERIES
765 case KVM_EXIT_PAPR_HCALL:
766 dprintf("handle PAPR hypercall\n");
767 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
768 run->papr_hcall.args);
78e8fde2 769 ret = 0;
f61b4bed
AG
770 break;
771#endif
73aaec4a
JK
772 default:
773 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
774 ret = -1;
775 break;
d76d1650
AJ
776 }
777
778 return ret;
779}
780
dc333cd6
AG
781static int read_cpuinfo(const char *field, char *value, int len)
782{
783 FILE *f;
784 int ret = -1;
785 int field_len = strlen(field);
786 char line[512];
787
788 f = fopen("/proc/cpuinfo", "r");
789 if (!f) {
790 return -1;
791 }
792
793 do {
794 if(!fgets(line, sizeof(line), f)) {
795 break;
796 }
797 if (!strncmp(line, field, field_len)) {
798 strncpy(value, line, len);
799 ret = 0;
800 break;
801 }
802 } while(*line);
803
804 fclose(f);
805
806 return ret;
807}
808
809uint32_t kvmppc_get_tbfreq(void)
810{
811 char line[512];
812 char *ns;
813 uint32_t retval = get_ticks_per_sec();
814
815 if (read_cpuinfo("timebase", line, sizeof(line))) {
816 return retval;
817 }
818
819 if (!(ns = strchr(line, ':'))) {
820 return retval;
821 }
822
823 ns++;
824
825 retval = atoi(ns);
826 return retval;
827}
4513d923 828
eadaada1
AG
829/* Try to find a device tree node for a CPU with clock-frequency property */
830static int kvmppc_find_cpu_dt(char *buf, int buf_len)
831{
832 struct dirent *dirp;
833 DIR *dp;
834
835 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
836 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
837 return -1;
838 }
839
840 buf[0] = '\0';
841 while ((dirp = readdir(dp)) != NULL) {
842 FILE *f;
843 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
844 dirp->d_name);
845 f = fopen(buf, "r");
846 if (f) {
847 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
848 fclose(f);
849 break;
850 }
851 buf[0] = '\0';
852 }
853 closedir(dp);
854 if (buf[0] == '\0') {
855 printf("Unknown host!\n");
856 return -1;
857 }
858
859 return 0;
860}
861
9bc884b7
DG
862/* Read a CPU node property from the host device tree that's a single
863 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
864 * (can't find or open the property, or doesn't understand the
865 * format) */
866static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 867{
9bc884b7
DG
868 char buf[PATH_MAX];
869 union {
870 uint32_t v32;
871 uint64_t v64;
872 } u;
eadaada1
AG
873 FILE *f;
874 int len;
875
876 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 877 return -1;
eadaada1
AG
878 }
879
9bc884b7
DG
880 strncat(buf, "/", sizeof(buf) - strlen(buf));
881 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
882
883 f = fopen(buf, "rb");
884 if (!f) {
885 return -1;
886 }
887
9bc884b7 888 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
889 fclose(f);
890 switch (len) {
9bc884b7
DG
891 case 4:
892 /* property is a 32-bit quantity */
893 return be32_to_cpu(u.v32);
894 case 8:
895 return be64_to_cpu(u.v64);
eadaada1
AG
896 }
897
898 return 0;
899}
900
9bc884b7
DG
901uint64_t kvmppc_get_clockfreq(void)
902{
903 return kvmppc_read_int_cpu_dt("clock-frequency");
904}
905
6659394f
DG
906uint32_t kvmppc_get_vmx(void)
907{
908 return kvmppc_read_int_cpu_dt("ibm,vmx");
909}
910
911uint32_t kvmppc_get_dfp(void)
912{
913 return kvmppc_read_int_cpu_dt("ibm,dfp");
914}
915
1328c2bf 916int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
917{
918 uint32_t *hc = (uint32_t*)buf;
919
45024f09
AG
920 struct kvm_ppc_pvinfo pvinfo;
921
922 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
923 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
924 memcpy(buf, pvinfo.hcall, buf_len);
925
926 return 0;
927 }
45024f09
AG
928
929 /*
930 * Fallback to always fail hypercalls:
931 *
932 * li r3, -1
933 * nop
934 * nop
935 * nop
936 */
937
938 hc[0] = 0x3860ffff;
939 hc[1] = 0x60000000;
940 hc[2] = 0x60000000;
941 hc[3] = 0x60000000;
942
943 return 0;
944}
945
1328c2bf 946void kvmppc_set_papr(CPUPPCState *env)
f61b4bed 947{
94135e81
AG
948 struct kvm_enable_cap cap = {};
949 struct kvm_one_reg reg = {};
950 struct kvm_sregs sregs = {};
f61b4bed 951 int ret;
a31be480 952 uint64_t hior = env->spr[SPR_HIOR];
f61b4bed 953
f61b4bed
AG
954 cap.cap = KVM_CAP_PPC_PAPR;
955 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
956
957 if (ret) {
958 goto fail;
959 }
960
961 /*
962 * XXX We set HIOR here. It really should be a qdev property of
963 * the CPU node, but we don't have CPUs converted to qdev yet.
964 *
965 * Once we have qdev CPUs, move HIOR to a qdev property and
966 * remove this chunk.
967 */
a31be480
AG
968 reg.id = KVM_REG_PPC_HIOR;
969 reg.addr = (uintptr_t)&hior;
94135e81
AG
970 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
971 if (ret) {
a31be480
AG
972 fprintf(stderr, "Couldn't set HIOR. Maybe you're running an old \n"
973 "kernel with support for HV KVM but no PAPR PR \n"
974 "KVM in which case things will work. If they don't \n"
975 "please update your host kernel!\n");
94135e81
AG
976 }
977
978 /* Set SDR1 so kernel space finds the HTAB */
979 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
980 if (ret) {
981 goto fail;
982 }
983
984 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
985
986 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
987 if (ret) {
988 goto fail;
989 }
f61b4bed
AG
990
991 return;
992
993fail:
994 cpu_abort(env, "This KVM version does not support PAPR\n");
995}
996
e97c3636
DG
997int kvmppc_smt_threads(void)
998{
999 return cap_ppc_smt ? cap_ppc_smt : 1;
1000}
1001
354ac20a
DG
1002off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1003{
1004 void *rma;
1005 off_t size;
1006 int fd;
1007 struct kvm_allocate_rma ret;
1008 MemoryRegion *rma_region;
1009
1010 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1011 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1012 * not necessary on this hardware
1013 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1014 *
1015 * FIXME: We should allow the user to force contiguous RMA
1016 * allocation in the cap_ppc_rma==1 case.
1017 */
1018 if (cap_ppc_rma < 2) {
1019 return 0;
1020 }
1021
1022 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1023 if (fd < 0) {
1024 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1025 strerror(errno));
1026 return -1;
1027 }
1028
1029 size = MIN(ret.rma_size, 256ul << 20);
1030
1031 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1032 if (rma == MAP_FAILED) {
1033 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1034 return -1;
1035 };
1036
1037 rma_region = g_new(MemoryRegion, 1);
6148b23d
AK
1038 memory_region_init_ram_ptr(rma_region, name, size, rma);
1039 vmstate_register_ram_global(rma_region);
354ac20a
DG
1040 memory_region_add_subregion(sysmem, 0, rma_region);
1041
1042 return size;
1043}
1044
0f5cb298
DG
1045void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1046{
1047 struct kvm_create_spapr_tce args = {
1048 .liobn = liobn,
1049 .window_size = window_size,
1050 };
1051 long len;
1052 int fd;
1053 void *table;
1054
b5aec396
DG
1055 /* Must set fd to -1 so we don't try to munmap when called for
1056 * destroying the table, which the upper layers -will- do
1057 */
1058 *pfd = -1;
0f5cb298
DG
1059 if (!cap_spapr_tce) {
1060 return NULL;
1061 }
1062
1063 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1064 if (fd < 0) {
b5aec396
DG
1065 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1066 liobn);
0f5cb298
DG
1067 return NULL;
1068 }
1069
ad0ebb91 1070 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
0f5cb298
DG
1071 /* FIXME: round this up to page size */
1072
74b41e56 1073 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1074 if (table == MAP_FAILED) {
b5aec396
DG
1075 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1076 liobn);
0f5cb298
DG
1077 close(fd);
1078 return NULL;
1079 }
1080
1081 *pfd = fd;
1082 return table;
1083}
1084
1085int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1086{
1087 long len;
1088
1089 if (fd < 0) {
1090 return -1;
1091 }
1092
ad0ebb91 1093 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
0f5cb298
DG
1094 if ((munmap(table, len) < 0) ||
1095 (close(fd) < 0)) {
b5aec396
DG
1096 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1097 strerror(errno));
0f5cb298
DG
1098 /* Leak the table */
1099 }
1100
1101 return 0;
1102}
1103
a1e98583
DG
1104static inline uint32_t mfpvr(void)
1105{
1106 uint32_t pvr;
1107
1108 asm ("mfpvr %0"
1109 : "=r"(pvr));
1110 return pvr;
1111}
1112
a7342588
DG
1113static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1114{
1115 if (on) {
1116 *word |= flags;
1117 } else {
1118 *word &= ~flags;
1119 }
1120}
1121
a1e98583
DG
1122const ppc_def_t *kvmppc_host_cpu_def(void)
1123{
1124 uint32_t host_pvr = mfpvr();
1125 const ppc_def_t *base_spec;
a7342588
DG
1126 ppc_def_t *spec;
1127 uint32_t vmx = kvmppc_get_vmx();
1128 uint32_t dfp = kvmppc_get_dfp();
a1e98583
DG
1129
1130 base_spec = ppc_find_by_pvr(host_pvr);
1131
a7342588
DG
1132 spec = g_malloc0(sizeof(*spec));
1133 memcpy(spec, base_spec, sizeof(*spec));
1134
1135 /* Now fix up the spec with information we can query from the host */
1136
70bca53f
AG
1137 if (vmx != -1) {
1138 /* Only override when we know what the host supports */
1139 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1140 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1141 }
1142 if (dfp != -1) {
1143 /* Only override when we know what the host supports */
1144 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1145 }
a7342588
DG
1146
1147 return spec;
a1e98583
DG
1148}
1149
12b1143b
DG
1150int kvmppc_fixup_cpu(CPUPPCState *env)
1151{
1152 int smt;
1153
1154 /* Adjust cpu index for SMT */
1155 smt = kvmppc_smt_threads();
1156 env->cpu_index = (env->cpu_index / smp_threads) * smt
1157 + (env->cpu_index % smp_threads);
1158
1159 return 0;
1160}
1161
1162
1328c2bf 1163bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
4513d923
GN
1164{
1165 return true;
1166}
a1b87fe0 1167
1328c2bf 1168int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
a1b87fe0
JK
1169{
1170 return 1;
1171}
1172
1173int kvm_arch_on_sigbus(int code, void *addr)
1174{
1175 return 1;
1176}