]> git.proxmox.com Git - qemu.git/blame - target-ppc/kvm.c
target-ppc: Enable ISEL on POWER7
[qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
1de7afc9 26#include "qemu/timer.h"
9c17d615
PB
27#include "sysemu/sysemu.h"
28#include "sysemu/kvm.h"
d76d1650
AJ
29#include "kvm_ppc.h"
30#include "cpu.h"
9c17d615
PB
31#include "sysemu/cpus.h"
32#include "sysemu/device_tree.h"
0f5cb298 33#include "hw/sysbus.h"
0d09e41a 34#include "hw/ppc/spapr.h"
d5aea6f3 35#include "mmu-hash64.h"
d76d1650 36
f61b4bed 37#include "hw/sysbus.h"
0d09e41a
PB
38#include "hw/ppc/spapr.h"
39#include "hw/ppc/spapr_vio.h"
f61b4bed 40
d76d1650
AJ
41//#define DEBUG_KVM
42
43#ifdef DEBUG_KVM
44#define dprintf(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46#else
47#define dprintf(fmt, ...) \
48 do { } while (0)
49#endif
50
eadaada1
AG
51#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52
94a8d39a
JK
53const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
55};
56
fc87e185
AG
57static int cap_interrupt_unset = false;
58static int cap_interrupt_level = false;
90dc8812 59static int cap_segstate;
90dc8812 60static int cap_booke_sregs;
e97c3636 61static int cap_ppc_smt;
354ac20a 62static int cap_ppc_rma;
0f5cb298 63static int cap_spapr_tce;
f1af19d7 64static int cap_hior;
d67d40ea 65static int cap_one_reg;
fc87e185 66
c821c2bd
AG
67/* XXX We have a race condition where we actually have a level triggered
68 * interrupt, but the infrastructure can't expose that yet, so the guest
69 * takes but ignores it, goes to sleep and never gets notified that there's
70 * still an interrupt pending.
c6a94ba5 71 *
c821c2bd
AG
72 * As a quick workaround, let's just wake up again 20 ms after we injected
73 * an interrupt. That way we can assure that we're always reinjecting
74 * interrupts in case the guest swallowed them.
c6a94ba5
AG
75 */
76static QEMUTimer *idle_timer;
77
d5a68146 78static void kvm_kick_cpu(void *opaque)
c6a94ba5 79{
d5a68146 80 PowerPCCPU *cpu = opaque;
d5a68146 81
c08d7424 82 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
83}
84
5ba4576b
AF
85static int kvm_ppc_register_host_cpu_type(void);
86
cad1e282 87int kvm_arch_init(KVMState *s)
d76d1650 88{
fc87e185 89 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 90 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 91 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 92 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 93 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 94 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 95 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
d67d40ea 96 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 97 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
fc87e185
AG
98
99 if (!cap_interrupt_level) {
100 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
101 "VM to stall at times!\n");
102 }
103
5ba4576b
AF
104 kvm_ppc_register_host_cpu_type();
105
d76d1650
AJ
106 return 0;
107}
108
1bc22652 109static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 110{
1bc22652
AF
111 CPUPPCState *cenv = &cpu->env;
112 CPUState *cs = CPU(cpu);
861bbc80 113 struct kvm_sregs sregs;
5666ca4a
SW
114 int ret;
115
116 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
117 /* What we're really trying to say is "if we're on BookE, we use
118 the native PVR for now". This is the only sane way to check
119 it though, so we potentially confuse users that they can run
120 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
121 return 0;
122 } else {
90dc8812 123 if (!cap_segstate) {
64e07be5
AG
124 fprintf(stderr, "kvm error: missing PVR setting capability\n");
125 return -ENOSYS;
5666ca4a 126 }
5666ca4a
SW
127 }
128
1bc22652 129 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
130 if (ret) {
131 return ret;
132 }
861bbc80
AG
133
134 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 135 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
136}
137
93dd5e85 138/* Set up a shared TLB array with KVM */
1bc22652 139static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 140{
1bc22652
AF
141 CPUPPCState *env = &cpu->env;
142 CPUState *cs = CPU(cpu);
93dd5e85
SW
143 struct kvm_book3e_206_tlb_params params = {};
144 struct kvm_config_tlb cfg = {};
145 struct kvm_enable_cap encap = {};
146 unsigned int entries = 0;
147 int ret, i;
148
149 if (!kvm_enabled() ||
a60f24b5 150 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
151 return 0;
152 }
153
154 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
155
156 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
157 params.tlb_sizes[i] = booke206_tlb_size(env, i);
158 params.tlb_ways[i] = booke206_tlb_ways(env, i);
159 entries += params.tlb_sizes[i];
160 }
161
162 assert(entries == env->nb_tlb);
163 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
164
165 env->tlb_dirty = true;
166
167 cfg.array = (uintptr_t)env->tlb.tlbm;
168 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
169 cfg.params = (uintptr_t)&params;
170 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
171
172 encap.cap = KVM_CAP_SW_TLB;
173 encap.args[0] = (uintptr_t)&cfg;
174
1bc22652 175 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
93dd5e85
SW
176 if (ret < 0) {
177 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
178 __func__, strerror(-ret));
179 return ret;
180 }
181
182 env->kvm_sw_tlb = true;
183 return 0;
184}
185
4656e1f0
BH
186
187#if defined(TARGET_PPC64)
a60f24b5 188static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
189 struct kvm_ppc_smmu_info *info)
190{
a60f24b5
AF
191 CPUPPCState *env = &cpu->env;
192 CPUState *cs = CPU(cpu);
193
4656e1f0
BH
194 memset(info, 0, sizeof(*info));
195
196 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
197 * need to "guess" what the supported page sizes are.
198 *
199 * For that to work we make a few assumptions:
200 *
201 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
202 * KVM which only supports 4K and 16M pages, but supports them
203 * regardless of the backing store characteritics. We also don't
204 * support 1T segments.
205 *
206 * This is safe as if HV KVM ever supports that capability or PR
207 * KVM grows supports for more page/segment sizes, those versions
208 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
209 * will not hit this fallback
210 *
211 * - Else we are running HV KVM. This means we only support page
212 * sizes that fit in the backing store. Additionally we only
213 * advertize 64K pages if the processor is ARCH 2.06 and we assume
214 * P7 encodings for the SLB and hash table. Here too, we assume
215 * support for any newer processor will mean a kernel that
216 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
217 * this fallback.
218 */
a60f24b5 219 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
220 /* No flags */
221 info->flags = 0;
222 info->slb_size = 64;
223
224 /* Standard 4k base page size segment */
225 info->sps[0].page_shift = 12;
226 info->sps[0].slb_enc = 0;
227 info->sps[0].enc[0].page_shift = 12;
228 info->sps[0].enc[0].pte_enc = 0;
229
230 /* Standard 16M large page size segment */
231 info->sps[1].page_shift = 24;
232 info->sps[1].slb_enc = SLB_VSID_L;
233 info->sps[1].enc[0].page_shift = 24;
234 info->sps[1].enc[0].pte_enc = 0;
235 } else {
236 int i = 0;
237
238 /* HV KVM has backing store size restrictions */
239 info->flags = KVM_PPC_PAGE_SIZES_REAL;
240
241 if (env->mmu_model & POWERPC_MMU_1TSEG) {
242 info->flags |= KVM_PPC_1T_SEGMENTS;
243 }
244
245 if (env->mmu_model == POWERPC_MMU_2_06) {
246 info->slb_size = 32;
247 } else {
248 info->slb_size = 64;
249 }
250
251 /* Standard 4k base page size segment */
252 info->sps[i].page_shift = 12;
253 info->sps[i].slb_enc = 0;
254 info->sps[i].enc[0].page_shift = 12;
255 info->sps[i].enc[0].pte_enc = 0;
256 i++;
257
258 /* 64K on MMU 2.06 */
259 if (env->mmu_model == POWERPC_MMU_2_06) {
260 info->sps[i].page_shift = 16;
261 info->sps[i].slb_enc = 0x110;
262 info->sps[i].enc[0].page_shift = 16;
263 info->sps[i].enc[0].pte_enc = 1;
264 i++;
265 }
266
267 /* Standard 16M large page size segment */
268 info->sps[i].page_shift = 24;
269 info->sps[i].slb_enc = SLB_VSID_L;
270 info->sps[i].enc[0].page_shift = 24;
271 info->sps[i].enc[0].pte_enc = 0;
272 }
273}
274
a60f24b5 275static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 276{
a60f24b5 277 CPUState *cs = CPU(cpu);
4656e1f0
BH
278 int ret;
279
a60f24b5
AF
280 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
281 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
282 if (ret == 0) {
283 return;
284 }
285 }
286
a60f24b5 287 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
288}
289
290static long getrampagesize(void)
291{
292 struct statfs fs;
293 int ret;
294
295 if (!mem_path) {
296 /* guest RAM is backed by normal anonymous pages */
297 return getpagesize();
298 }
299
300 do {
301 ret = statfs(mem_path, &fs);
302 } while (ret != 0 && errno == EINTR);
303
304 if (ret != 0) {
305 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
306 strerror(errno));
307 exit(1);
308 }
309
310#define HUGETLBFS_MAGIC 0x958458f6
311
312 if (fs.f_type != HUGETLBFS_MAGIC) {
313 /* Explicit mempath, but it's ordinary pages */
314 return getpagesize();
315 }
316
317 /* It's hugepage, return the huge page size */
318 return fs.f_bsize;
319}
320
321static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
322{
323 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
324 return true;
325 }
326
327 return (1ul << shift) <= rampgsize;
328}
329
a60f24b5 330static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
331{
332 static struct kvm_ppc_smmu_info smmu_info;
333 static bool has_smmu_info;
a60f24b5 334 CPUPPCState *env = &cpu->env;
4656e1f0
BH
335 long rampagesize;
336 int iq, ik, jq, jk;
337
338 /* We only handle page sizes for 64-bit server guests for now */
339 if (!(env->mmu_model & POWERPC_MMU_64)) {
340 return;
341 }
342
343 /* Collect MMU info from kernel if not already */
344 if (!has_smmu_info) {
a60f24b5 345 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
346 has_smmu_info = true;
347 }
348
349 rampagesize = getrampagesize();
350
351 /* Convert to QEMU form */
352 memset(&env->sps, 0, sizeof(env->sps));
353
354 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
355 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
356 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
357
358 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
359 ksps->page_shift)) {
360 continue;
361 }
362 qsps->page_shift = ksps->page_shift;
363 qsps->slb_enc = ksps->slb_enc;
364 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
365 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
366 ksps->enc[jk].page_shift)) {
367 continue;
368 }
369 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
370 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
371 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
372 break;
373 }
374 }
375 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
376 break;
377 }
378 }
379 env->slb_nr = smmu_info.slb_size;
380 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
381 env->mmu_model |= POWERPC_MMU_1TSEG;
382 } else {
383 env->mmu_model &= ~POWERPC_MMU_1TSEG;
384 }
385}
386#else /* defined (TARGET_PPC64) */
387
a60f24b5 388static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
389{
390}
391
392#endif /* !defined (TARGET_PPC64) */
393
b164e48e
EH
394unsigned long kvm_arch_vcpu_id(CPUState *cpu)
395{
396 return cpu->cpu_index;
397}
398
20d695a9 399int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 400{
20d695a9
AF
401 PowerPCCPU *cpu = POWERPC_CPU(cs);
402 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
403 int ret;
404
4656e1f0 405 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 406 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
407
408 /* Synchronize sregs with kvm */
1bc22652 409 ret = kvm_arch_sync_sregs(cpu);
5666ca4a
SW
410 if (ret) {
411 return ret;
412 }
861bbc80 413
d5a68146 414 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
c821c2bd 415
93dd5e85
SW
416 /* Some targets support access to KVM's guest TLB. */
417 switch (cenv->mmu_model) {
418 case POWERPC_MMU_BOOKE206:
1bc22652 419 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
420 break;
421 default:
422 break;
423 }
424
861bbc80 425 return ret;
d76d1650
AJ
426}
427
20d695a9 428void kvm_arch_reset_vcpu(CPUState *cpu)
caa5af0f
JK
429{
430}
431
1bc22652 432static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 433{
1bc22652
AF
434 CPUPPCState *env = &cpu->env;
435 CPUState *cs = CPU(cpu);
93dd5e85
SW
436 struct kvm_dirty_tlb dirty_tlb;
437 unsigned char *bitmap;
438 int ret;
439
440 if (!env->kvm_sw_tlb) {
441 return;
442 }
443
444 bitmap = g_malloc((env->nb_tlb + 7) / 8);
445 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
446
447 dirty_tlb.bitmap = (uintptr_t)bitmap;
448 dirty_tlb.num_dirty = env->nb_tlb;
449
1bc22652 450 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
451 if (ret) {
452 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
453 __func__, strerror(-ret));
454 }
455
456 g_free(bitmap);
457}
458
d67d40ea
DG
459static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
460{
461 PowerPCCPU *cpu = POWERPC_CPU(cs);
462 CPUPPCState *env = &cpu->env;
463 union {
464 uint32_t u32;
465 uint64_t u64;
466 } val;
467 struct kvm_one_reg reg = {
468 .id = id,
469 .addr = (uintptr_t) &val,
470 };
471 int ret;
472
473 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
474 if (ret != 0) {
475 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
476 spr, strerror(errno));
477 } else {
478 switch (id & KVM_REG_SIZE_MASK) {
479 case KVM_REG_SIZE_U32:
480 env->spr[spr] = val.u32;
481 break;
482
483 case KVM_REG_SIZE_U64:
484 env->spr[spr] = val.u64;
485 break;
486
487 default:
488 /* Don't handle this size yet */
489 abort();
490 }
491 }
492}
493
494static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
495{
496 PowerPCCPU *cpu = POWERPC_CPU(cs);
497 CPUPPCState *env = &cpu->env;
498 union {
499 uint32_t u32;
500 uint64_t u64;
501 } val;
502 struct kvm_one_reg reg = {
503 .id = id,
504 .addr = (uintptr_t) &val,
505 };
506 int ret;
507
508 switch (id & KVM_REG_SIZE_MASK) {
509 case KVM_REG_SIZE_U32:
510 val.u32 = env->spr[spr];
511 break;
512
513 case KVM_REG_SIZE_U64:
514 val.u64 = env->spr[spr];
515 break;
516
517 default:
518 /* Don't handle this size yet */
519 abort();
520 }
521
522 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
523 if (ret != 0) {
524 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
525 spr, strerror(errno));
526 }
527}
528
70b79849
DG
529static int kvm_put_fp(CPUState *cs)
530{
531 PowerPCCPU *cpu = POWERPC_CPU(cs);
532 CPUPPCState *env = &cpu->env;
533 struct kvm_one_reg reg;
534 int i;
535 int ret;
536
537 if (env->insns_flags & PPC_FLOAT) {
538 uint64_t fpscr = env->fpscr;
539 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
540
541 reg.id = KVM_REG_PPC_FPSCR;
542 reg.addr = (uintptr_t)&fpscr;
543 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
544 if (ret < 0) {
545 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
546 return ret;
547 }
548
549 for (i = 0; i < 32; i++) {
550 uint64_t vsr[2];
551
552 vsr[0] = float64_val(env->fpr[i]);
553 vsr[1] = env->vsr[i];
554 reg.addr = (uintptr_t) &vsr;
555 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
556
557 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
558 if (ret < 0) {
559 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
560 i, strerror(errno));
561 return ret;
562 }
563 }
564 }
565
566 if (env->insns_flags & PPC_ALTIVEC) {
567 reg.id = KVM_REG_PPC_VSCR;
568 reg.addr = (uintptr_t)&env->vscr;
569 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
570 if (ret < 0) {
571 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
572 return ret;
573 }
574
575 for (i = 0; i < 32; i++) {
576 reg.id = KVM_REG_PPC_VR(i);
577 reg.addr = (uintptr_t)&env->avr[i];
578 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
579 if (ret < 0) {
580 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
581 return ret;
582 }
583 }
584 }
585
586 return 0;
587}
588
589static int kvm_get_fp(CPUState *cs)
590{
591 PowerPCCPU *cpu = POWERPC_CPU(cs);
592 CPUPPCState *env = &cpu->env;
593 struct kvm_one_reg reg;
594 int i;
595 int ret;
596
597 if (env->insns_flags & PPC_FLOAT) {
598 uint64_t fpscr;
599 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
600
601 reg.id = KVM_REG_PPC_FPSCR;
602 reg.addr = (uintptr_t)&fpscr;
603 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
604 if (ret < 0) {
605 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
606 return ret;
607 } else {
608 env->fpscr = fpscr;
609 }
610
611 for (i = 0; i < 32; i++) {
612 uint64_t vsr[2];
613
614 reg.addr = (uintptr_t) &vsr;
615 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
616
617 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
618 if (ret < 0) {
619 dprintf("Unable to get %s%d from KVM: %s\n",
620 vsx ? "VSR" : "FPR", i, strerror(errno));
621 return ret;
622 } else {
623 env->fpr[i] = vsr[0];
624 if (vsx) {
625 env->vsr[i] = vsr[1];
626 }
627 }
628 }
629 }
630
631 if (env->insns_flags & PPC_ALTIVEC) {
632 reg.id = KVM_REG_PPC_VSCR;
633 reg.addr = (uintptr_t)&env->vscr;
634 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
635 if (ret < 0) {
636 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
637 return ret;
638 }
639
640 for (i = 0; i < 32; i++) {
641 reg.id = KVM_REG_PPC_VR(i);
642 reg.addr = (uintptr_t)&env->avr[i];
643 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
644 if (ret < 0) {
645 dprintf("Unable to get VR%d from KVM: %s\n",
646 i, strerror(errno));
647 return ret;
648 }
649 }
650 }
651
652 return 0;
653}
654
20d695a9 655int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 656{
20d695a9
AF
657 PowerPCCPU *cpu = POWERPC_CPU(cs);
658 CPUPPCState *env = &cpu->env;
d76d1650
AJ
659 struct kvm_regs regs;
660 int ret;
661 int i;
662
1bc22652
AF
663 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
664 if (ret < 0) {
d76d1650 665 return ret;
1bc22652 666 }
d76d1650
AJ
667
668 regs.ctr = env->ctr;
669 regs.lr = env->lr;
da91a00f 670 regs.xer = cpu_read_xer(env);
d76d1650
AJ
671 regs.msr = env->msr;
672 regs.pc = env->nip;
673
674 regs.srr0 = env->spr[SPR_SRR0];
675 regs.srr1 = env->spr[SPR_SRR1];
676
677 regs.sprg0 = env->spr[SPR_SPRG0];
678 regs.sprg1 = env->spr[SPR_SPRG1];
679 regs.sprg2 = env->spr[SPR_SPRG2];
680 regs.sprg3 = env->spr[SPR_SPRG3];
681 regs.sprg4 = env->spr[SPR_SPRG4];
682 regs.sprg5 = env->spr[SPR_SPRG5];
683 regs.sprg6 = env->spr[SPR_SPRG6];
684 regs.sprg7 = env->spr[SPR_SPRG7];
685
90dc8812
SW
686 regs.pid = env->spr[SPR_BOOKE_PID];
687
d76d1650
AJ
688 for (i = 0;i < 32; i++)
689 regs.gpr[i] = env->gpr[i];
690
1bc22652 691 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
692 if (ret < 0)
693 return ret;
694
70b79849
DG
695 kvm_put_fp(cs);
696
93dd5e85 697 if (env->tlb_dirty) {
1bc22652 698 kvm_sw_tlb_put(cpu);
93dd5e85
SW
699 env->tlb_dirty = false;
700 }
701
f1af19d7
DG
702 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
703 struct kvm_sregs sregs;
704
705 sregs.pvr = env->spr[SPR_PVR];
706
707 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
708
709 /* Sync SLB */
710#ifdef TARGET_PPC64
711 for (i = 0; i < 64; i++) {
712 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
713 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
714 }
715#endif
716
717 /* Sync SRs */
718 for (i = 0; i < 16; i++) {
719 sregs.u.s.ppc32.sr[i] = env->sr[i];
720 }
721
722 /* Sync BATs */
723 for (i = 0; i < 8; i++) {
ef8beb0e
AG
724 /* Beware. We have to swap upper and lower bits here */
725 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
726 | env->DBAT[1][i];
727 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
728 | env->IBAT[1][i];
f1af19d7
DG
729 }
730
1bc22652 731 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
f1af19d7
DG
732 if (ret) {
733 return ret;
734 }
735 }
736
737 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
738 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
739 }
f1af19d7 740
d67d40ea
DG
741 if (cap_one_reg) {
742 int i;
743
744 /* We deliberately ignore errors here, for kernels which have
745 * the ONE_REG calls, but don't support the specific
746 * registers, there's a reasonable chance things will still
747 * work, at least until we try to migrate. */
748 for (i = 0; i < 1024; i++) {
749 uint64_t id = env->spr_cb[i].one_reg_id;
750
751 if (id != 0) {
752 kvm_put_one_spr(cs, id, i);
753 }
f1af19d7
DG
754 }
755 }
756
d76d1650
AJ
757 return ret;
758}
759
20d695a9 760int kvm_arch_get_registers(CPUState *cs)
d76d1650 761{
20d695a9
AF
762 PowerPCCPU *cpu = POWERPC_CPU(cs);
763 CPUPPCState *env = &cpu->env;
d76d1650 764 struct kvm_regs regs;
ba5e5090 765 struct kvm_sregs sregs;
90dc8812 766 uint32_t cr;
138b38b6 767 int i, ret;
d76d1650 768
1bc22652 769 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
770 if (ret < 0)
771 return ret;
772
90dc8812
SW
773 cr = regs.cr;
774 for (i = 7; i >= 0; i--) {
775 env->crf[i] = cr & 15;
776 cr >>= 4;
777 }
ba5e5090 778
d76d1650
AJ
779 env->ctr = regs.ctr;
780 env->lr = regs.lr;
da91a00f 781 cpu_write_xer(env, regs.xer);
d76d1650
AJ
782 env->msr = regs.msr;
783 env->nip = regs.pc;
784
785 env->spr[SPR_SRR0] = regs.srr0;
786 env->spr[SPR_SRR1] = regs.srr1;
787
788 env->spr[SPR_SPRG0] = regs.sprg0;
789 env->spr[SPR_SPRG1] = regs.sprg1;
790 env->spr[SPR_SPRG2] = regs.sprg2;
791 env->spr[SPR_SPRG3] = regs.sprg3;
792 env->spr[SPR_SPRG4] = regs.sprg4;
793 env->spr[SPR_SPRG5] = regs.sprg5;
794 env->spr[SPR_SPRG6] = regs.sprg6;
795 env->spr[SPR_SPRG7] = regs.sprg7;
796
90dc8812
SW
797 env->spr[SPR_BOOKE_PID] = regs.pid;
798
d76d1650
AJ
799 for (i = 0;i < 32; i++)
800 env->gpr[i] = regs.gpr[i];
801
70b79849
DG
802 kvm_get_fp(cs);
803
90dc8812 804 if (cap_booke_sregs) {
1bc22652 805 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
806 if (ret < 0) {
807 return ret;
808 }
809
810 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
811 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
812 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
813 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
814 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
815 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
816 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
817 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
818 env->spr[SPR_DECR] = sregs.u.e.dec;
819 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
820 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
821 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
822 }
823
824 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
825 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
826 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
827 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
828 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
829 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
830 }
831
832 if (sregs.u.e.features & KVM_SREGS_E_64) {
833 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
834 }
835
836 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
837 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
838 }
839
840 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
841 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
842 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
843 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
844 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
845 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
846 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
847 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
848 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
849 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
850 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
851 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
852 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
853 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
854 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
855 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
856 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
857
858 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
859 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
860 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
861 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
862 }
863
864 if (sregs.u.e.features & KVM_SREGS_E_PM) {
865 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
866 }
867
868 if (sregs.u.e.features & KVM_SREGS_E_PC) {
869 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
870 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
871 }
872 }
873
874 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
875 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
876 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
877 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
878 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
879 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
880 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
881 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
882 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
883 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
884 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
885 }
886
887 if (sregs.u.e.features & KVM_SREGS_EXP) {
888 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
889 }
890
891 if (sregs.u.e.features & KVM_SREGS_E_PD) {
892 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
893 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
894 }
895
896 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
897 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
898 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
899 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
900
901 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
902 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
903 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
904 }
905 }
fafc0b6a 906 }
90dc8812 907
90dc8812 908 if (cap_segstate) {
1bc22652 909 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
910 if (ret < 0) {
911 return ret;
912 }
913
bb593904 914 ppc_store_sdr1(env, sregs.u.s.sdr1);
ba5e5090
AG
915
916 /* Sync SLB */
82c09f2f 917#ifdef TARGET_PPC64
ba5e5090
AG
918 for (i = 0; i < 64; i++) {
919 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
920 sregs.u.s.ppc64.slb[i].slbv);
921 }
82c09f2f 922#endif
ba5e5090
AG
923
924 /* Sync SRs */
925 for (i = 0; i < 16; i++) {
926 env->sr[i] = sregs.u.s.ppc32.sr[i];
927 }
928
929 /* Sync BATs */
930 for (i = 0; i < 8; i++) {
931 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
932 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
933 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
934 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
935 }
fafc0b6a 936 }
ba5e5090 937
d67d40ea
DG
938 if (cap_hior) {
939 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
940 }
941
942 if (cap_one_reg) {
943 int i;
944
945 /* We deliberately ignore errors here, for kernels which have
946 * the ONE_REG calls, but don't support the specific
947 * registers, there's a reasonable chance things will still
948 * work, at least until we try to migrate. */
949 for (i = 0; i < 1024; i++) {
950 uint64_t id = env->spr_cb[i].one_reg_id;
951
952 if (id != 0) {
953 kvm_get_one_spr(cs, id, i);
954 }
955 }
956 }
957
d76d1650
AJ
958 return 0;
959}
960
1bc22652 961int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
962{
963 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
964
965 if (irq != PPC_INTERRUPT_EXT) {
966 return 0;
967 }
968
969 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
970 return 0;
971 }
972
1bc22652 973 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
974
975 return 0;
976}
977
16415335
AG
978#if defined(TARGET_PPCEMB)
979#define PPC_INPUT_INT PPC40x_INPUT_INT
980#elif defined(TARGET_PPC64)
981#define PPC_INPUT_INT PPC970_INPUT_INT
982#else
983#define PPC_INPUT_INT PPC6xx_INPUT_INT
984#endif
985
20d695a9 986void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 987{
20d695a9
AF
988 PowerPCCPU *cpu = POWERPC_CPU(cs);
989 CPUPPCState *env = &cpu->env;
d76d1650
AJ
990 int r;
991 unsigned irq;
992
5cbdb3a3 993 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 994 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
995 if (!cap_interrupt_level &&
996 run->ready_for_interrupt_injection &&
259186a7 997 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 998 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
999 {
1000 /* For now KVM disregards the 'irq' argument. However, in the
1001 * future KVM could cache it in-kernel to avoid a heavyweight exit
1002 * when reading the UIC.
1003 */
fc87e185 1004 irq = KVM_INTERRUPT_SET;
d76d1650
AJ
1005
1006 dprintf("injected interrupt %d\n", irq);
1bc22652 1007 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1008 if (r < 0) {
1009 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1010 }
c821c2bd
AG
1011
1012 /* Always wake up soon in case the interrupt was level based */
74475455 1013 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
c821c2bd 1014 (get_ticks_per_sec() / 50));
d76d1650
AJ
1015 }
1016
1017 /* We don't know if there are more interrupts pending after this. However,
1018 * the guest will return to userspace in the course of handling this one
1019 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
1020}
1021
20d695a9 1022void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
d76d1650 1023{
d76d1650
AJ
1024}
1025
20d695a9 1026int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1027{
259186a7 1028 return cs->halted;
0af691d7
MT
1029}
1030
259186a7 1031static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1032{
259186a7
AF
1033 CPUState *cs = CPU(cpu);
1034 CPUPPCState *env = &cpu->env;
1035
1036 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1037 cs->halted = 1;
d76d1650
AJ
1038 env->exception_index = EXCP_HLT;
1039 }
1040
bb4ea393 1041 return 0;
d76d1650
AJ
1042}
1043
1044/* map dcr access to existing qemu dcr emulation */
1328c2bf 1045static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1046{
1047 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1048 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1049
bb4ea393 1050 return 0;
d76d1650
AJ
1051}
1052
1328c2bf 1053static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1054{
1055 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1056 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1057
bb4ea393 1058 return 0;
d76d1650
AJ
1059}
1060
20d695a9 1061int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1062{
20d695a9
AF
1063 PowerPCCPU *cpu = POWERPC_CPU(cs);
1064 CPUPPCState *env = &cpu->env;
bb4ea393 1065 int ret;
d76d1650
AJ
1066
1067 switch (run->exit_reason) {
1068 case KVM_EXIT_DCR:
1069 if (run->dcr.is_write) {
1070 dprintf("handle dcr write\n");
1071 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1072 } else {
1073 dprintf("handle dcr read\n");
1074 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1075 }
1076 break;
1077 case KVM_EXIT_HLT:
1078 dprintf("handle halt\n");
259186a7 1079 ret = kvmppc_handle_halt(cpu);
d76d1650 1080 break;
c6304a4a 1081#if defined(TARGET_PPC64)
f61b4bed
AG
1082 case KVM_EXIT_PAPR_HCALL:
1083 dprintf("handle PAPR hypercall\n");
20d695a9 1084 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1085 run->papr_hcall.nr,
f61b4bed 1086 run->papr_hcall.args);
78e8fde2 1087 ret = 0;
f61b4bed
AG
1088 break;
1089#endif
5b95b8b9
AG
1090 case KVM_EXIT_EPR:
1091 dprintf("handle epr\n");
1092 run->epr.epr = ldl_phys(env->mpic_iack);
1093 ret = 0;
1094 break;
73aaec4a
JK
1095 default:
1096 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1097 ret = -1;
1098 break;
d76d1650
AJ
1099 }
1100
1101 return ret;
1102}
1103
dc333cd6
AG
1104static int read_cpuinfo(const char *field, char *value, int len)
1105{
1106 FILE *f;
1107 int ret = -1;
1108 int field_len = strlen(field);
1109 char line[512];
1110
1111 f = fopen("/proc/cpuinfo", "r");
1112 if (!f) {
1113 return -1;
1114 }
1115
1116 do {
1117 if(!fgets(line, sizeof(line), f)) {
1118 break;
1119 }
1120 if (!strncmp(line, field, field_len)) {
ae215068 1121 pstrcpy(value, len, line);
dc333cd6
AG
1122 ret = 0;
1123 break;
1124 }
1125 } while(*line);
1126
1127 fclose(f);
1128
1129 return ret;
1130}
1131
1132uint32_t kvmppc_get_tbfreq(void)
1133{
1134 char line[512];
1135 char *ns;
1136 uint32_t retval = get_ticks_per_sec();
1137
1138 if (read_cpuinfo("timebase", line, sizeof(line))) {
1139 return retval;
1140 }
1141
1142 if (!(ns = strchr(line, ':'))) {
1143 return retval;
1144 }
1145
1146 ns++;
1147
1148 retval = atoi(ns);
1149 return retval;
1150}
4513d923 1151
eadaada1
AG
1152/* Try to find a device tree node for a CPU with clock-frequency property */
1153static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1154{
1155 struct dirent *dirp;
1156 DIR *dp;
1157
1158 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1159 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1160 return -1;
1161 }
1162
1163 buf[0] = '\0';
1164 while ((dirp = readdir(dp)) != NULL) {
1165 FILE *f;
1166 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1167 dirp->d_name);
1168 f = fopen(buf, "r");
1169 if (f) {
1170 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1171 fclose(f);
1172 break;
1173 }
1174 buf[0] = '\0';
1175 }
1176 closedir(dp);
1177 if (buf[0] == '\0') {
1178 printf("Unknown host!\n");
1179 return -1;
1180 }
1181
1182 return 0;
1183}
1184
9bc884b7
DG
1185/* Read a CPU node property from the host device tree that's a single
1186 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1187 * (can't find or open the property, or doesn't understand the
1188 * format) */
1189static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 1190{
9bc884b7
DG
1191 char buf[PATH_MAX];
1192 union {
1193 uint32_t v32;
1194 uint64_t v64;
1195 } u;
eadaada1
AG
1196 FILE *f;
1197 int len;
1198
1199 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 1200 return -1;
eadaada1
AG
1201 }
1202
9bc884b7
DG
1203 strncat(buf, "/", sizeof(buf) - strlen(buf));
1204 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
1205
1206 f = fopen(buf, "rb");
1207 if (!f) {
1208 return -1;
1209 }
1210
9bc884b7 1211 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1212 fclose(f);
1213 switch (len) {
9bc884b7
DG
1214 case 4:
1215 /* property is a 32-bit quantity */
1216 return be32_to_cpu(u.v32);
1217 case 8:
1218 return be64_to_cpu(u.v64);
eadaada1
AG
1219 }
1220
1221 return 0;
1222}
1223
9bc884b7
DG
1224uint64_t kvmppc_get_clockfreq(void)
1225{
1226 return kvmppc_read_int_cpu_dt("clock-frequency");
1227}
1228
6659394f
DG
1229uint32_t kvmppc_get_vmx(void)
1230{
1231 return kvmppc_read_int_cpu_dt("ibm,vmx");
1232}
1233
1234uint32_t kvmppc_get_dfp(void)
1235{
1236 return kvmppc_read_int_cpu_dt("ibm,dfp");
1237}
1238
1a61a9ae
SY
1239static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1240 {
1241 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1242 CPUState *cs = CPU(cpu);
1243
1244 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1245 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1246 return 0;
1247 }
1248
1249 return 1;
1250}
1251
1252int kvmppc_get_hasidle(CPUPPCState *env)
1253{
1254 struct kvm_ppc_pvinfo pvinfo;
1255
1256 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1257 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1258 return 1;
1259 }
1260
1261 return 0;
1262}
1263
1328c2bf 1264int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1265{
1266 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1267 struct kvm_ppc_pvinfo pvinfo;
1268
1a61a9ae 1269 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1270 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1271 return 0;
1272 }
45024f09
AG
1273
1274 /*
1275 * Fallback to always fail hypercalls:
1276 *
1277 * li r3, -1
1278 * nop
1279 * nop
1280 * nop
1281 */
1282
1283 hc[0] = 0x3860ffff;
1284 hc[1] = 0x60000000;
1285 hc[2] = 0x60000000;
1286 hc[3] = 0x60000000;
1287
1288 return 0;
1289}
1290
1bc22652 1291void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 1292{
1bc22652
AF
1293 CPUPPCState *env = &cpu->env;
1294 CPUState *cs = CPU(cpu);
94135e81 1295 struct kvm_enable_cap cap = {};
f61b4bed
AG
1296 int ret;
1297
f61b4bed 1298 cap.cap = KVM_CAP_PPC_PAPR;
1bc22652 1299 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
f61b4bed
AG
1300
1301 if (ret) {
f1af19d7 1302 cpu_abort(env, "This KVM version does not support PAPR\n");
94135e81 1303 }
f61b4bed
AG
1304}
1305
5b95b8b9
AG
1306void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1307{
1308 CPUPPCState *env = &cpu->env;
1309 CPUState *cs = CPU(cpu);
1310 struct kvm_enable_cap cap = {};
1311 int ret;
1312
1313 cap.cap = KVM_CAP_PPC_EPR;
1314 cap.args[0] = mpic_proxy;
1315 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1316
1317 if (ret && mpic_proxy) {
1318 cpu_abort(env, "This KVM version does not support EPR\n");
1319 }
1320}
1321
e97c3636
DG
1322int kvmppc_smt_threads(void)
1323{
1324 return cap_ppc_smt ? cap_ppc_smt : 1;
1325}
1326
7f763a5d 1327#ifdef TARGET_PPC64
354ac20a
DG
1328off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1329{
1330 void *rma;
1331 off_t size;
1332 int fd;
1333 struct kvm_allocate_rma ret;
1334 MemoryRegion *rma_region;
1335
1336 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1337 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1338 * not necessary on this hardware
1339 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1340 *
1341 * FIXME: We should allow the user to force contiguous RMA
1342 * allocation in the cap_ppc_rma==1 case.
1343 */
1344 if (cap_ppc_rma < 2) {
1345 return 0;
1346 }
1347
1348 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1349 if (fd < 0) {
1350 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1351 strerror(errno));
1352 return -1;
1353 }
1354
1355 size = MIN(ret.rma_size, 256ul << 20);
1356
1357 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1358 if (rma == MAP_FAILED) {
1359 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1360 return -1;
1361 };
1362
1363 rma_region = g_new(MemoryRegion, 1);
6148b23d
AK
1364 memory_region_init_ram_ptr(rma_region, name, size, rma);
1365 vmstate_register_ram_global(rma_region);
354ac20a
DG
1366 memory_region_add_subregion(sysmem, 0, rma_region);
1367
1368 return size;
1369}
1370
7f763a5d
DG
1371uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1372{
1373 if (cap_ppc_rma >= 2) {
1374 return current_size;
1375 }
1376 return MIN(current_size,
1377 getrampagesize() << (hash_shift - 7));
1378}
1379#endif
1380
0f5cb298
DG
1381void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1382{
1383 struct kvm_create_spapr_tce args = {
1384 .liobn = liobn,
1385 .window_size = window_size,
1386 };
1387 long len;
1388 int fd;
1389 void *table;
1390
b5aec396
DG
1391 /* Must set fd to -1 so we don't try to munmap when called for
1392 * destroying the table, which the upper layers -will- do
1393 */
1394 *pfd = -1;
0f5cb298
DG
1395 if (!cap_spapr_tce) {
1396 return NULL;
1397 }
1398
1399 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1400 if (fd < 0) {
b5aec396
DG
1401 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1402 liobn);
0f5cb298
DG
1403 return NULL;
1404 }
1405
ad0ebb91 1406 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
0f5cb298
DG
1407 /* FIXME: round this up to page size */
1408
74b41e56 1409 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1410 if (table == MAP_FAILED) {
b5aec396
DG
1411 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1412 liobn);
0f5cb298
DG
1413 close(fd);
1414 return NULL;
1415 }
1416
1417 *pfd = fd;
1418 return table;
1419}
1420
1421int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1422{
1423 long len;
1424
1425 if (fd < 0) {
1426 return -1;
1427 }
1428
ad0ebb91 1429 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
0f5cb298
DG
1430 if ((munmap(table, len) < 0) ||
1431 (close(fd) < 0)) {
b5aec396
DG
1432 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1433 strerror(errno));
0f5cb298
DG
1434 /* Leak the table */
1435 }
1436
1437 return 0;
1438}
1439
7f763a5d
DG
1440int kvmppc_reset_htab(int shift_hint)
1441{
1442 uint32_t shift = shift_hint;
1443
ace9a2cb
DG
1444 if (!kvm_enabled()) {
1445 /* Full emulation, tell caller to allocate htab itself */
1446 return 0;
1447 }
1448 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
1449 int ret;
1450 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
1451 if (ret == -ENOTTY) {
1452 /* At least some versions of PR KVM advertise the
1453 * capability, but don't implement the ioctl(). Oops.
1454 * Return 0 so that we allocate the htab in qemu, as is
1455 * correct for PR. */
1456 return 0;
1457 } else if (ret < 0) {
7f763a5d
DG
1458 return ret;
1459 }
1460 return shift;
1461 }
1462
ace9a2cb
DG
1463 /* We have a kernel that predates the htab reset calls. For PR
1464 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1465 * this era, it has allocated a 16MB fixed size hash table
1466 * already. Kernels of this era have the GET_PVINFO capability
1467 * only on PR, so we use this hack to determine the right
1468 * answer */
1469 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1470 /* PR - tell caller to allocate htab */
1471 return 0;
1472 } else {
1473 /* HV - assume 16MB kernel allocated htab */
1474 return 24;
1475 }
7f763a5d
DG
1476}
1477
a1e98583
DG
1478static inline uint32_t mfpvr(void)
1479{
1480 uint32_t pvr;
1481
1482 asm ("mfpvr %0"
1483 : "=r"(pvr));
1484 return pvr;
1485}
1486
a7342588
DG
1487static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1488{
1489 if (on) {
1490 *word |= flags;
1491 } else {
1492 *word &= ~flags;
1493 }
1494}
1495
2985b86b 1496static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 1497{
2985b86b
AF
1498 assert(kvm_enabled());
1499}
1500
1501static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1502{
1503 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
1504 uint32_t vmx = kvmppc_get_vmx();
1505 uint32_t dfp = kvmppc_get_dfp();
a1e98583 1506
cfe34f44 1507 /* Now fix up the class with information we can query from the host */
a7342588 1508
70bca53f
AG
1509 if (vmx != -1) {
1510 /* Only override when we know what the host supports */
cfe34f44
AF
1511 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1512 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
1513 }
1514 if (dfp != -1) {
1515 /* Only override when we know what the host supports */
cfe34f44 1516 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 1517 }
a1e98583
DG
1518}
1519
55e5c285 1520int kvmppc_fixup_cpu(PowerPCCPU *cpu)
12b1143b 1521{
55e5c285 1522 CPUState *cs = CPU(cpu);
12b1143b
DG
1523 int smt;
1524
1525 /* Adjust cpu index for SMT */
1526 smt = kvmppc_smt_threads();
55e5c285
AF
1527 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1528 + (cs->cpu_index % smp_threads);
12b1143b
DG
1529
1530 return 0;
1531}
1532
5ba4576b
AF
1533static int kvm_ppc_register_host_cpu_type(void)
1534{
1535 TypeInfo type_info = {
1536 .name = TYPE_HOST_POWERPC_CPU,
1537 .instance_init = kvmppc_host_cpu_initfn,
1538 .class_init = kvmppc_host_cpu_class_init,
1539 };
1540 uint32_t host_pvr = mfpvr();
1541 PowerPCCPUClass *pvr_pcc;
1542
1543 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1544 if (pvr_pcc == NULL) {
1545 return -1;
1546 }
1547 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1548 type_register(&type_info);
1549 return 0;
1550}
1551
12b1143b 1552
20d695a9 1553bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
1554{
1555 return true;
1556}
a1b87fe0 1557
20d695a9 1558int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
1559{
1560 return 1;
1561}
1562
1563int kvm_arch_on_sigbus(int code, void *addr)
1564{
1565 return 1;
1566}