]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/kvm.c
linux-headers: Update to kvm/queue
[mirror_qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
1de7afc9 26#include "qemu/timer.h"
9c17d615
PB
27#include "sysemu/sysemu.h"
28#include "sysemu/kvm.h"
d76d1650
AJ
29#include "kvm_ppc.h"
30#include "cpu.h"
9c17d615
PB
31#include "sysemu/cpus.h"
32#include "sysemu/device_tree.h"
0f5cb298 33#include "hw/sysbus.h"
0d09e41a 34#include "hw/ppc/spapr.h"
d5aea6f3 35#include "mmu-hash64.h"
d76d1650 36
f61b4bed 37#include "hw/sysbus.h"
0d09e41a
PB
38#include "hw/ppc/spapr.h"
39#include "hw/ppc/spapr_vio.h"
f61b4bed 40
d76d1650
AJ
41//#define DEBUG_KVM
42
43#ifdef DEBUG_KVM
44#define dprintf(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46#else
47#define dprintf(fmt, ...) \
48 do { } while (0)
49#endif
50
eadaada1
AG
51#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52
94a8d39a
JK
53const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
55};
56
fc87e185
AG
57static int cap_interrupt_unset = false;
58static int cap_interrupt_level = false;
90dc8812 59static int cap_segstate;
90dc8812 60static int cap_booke_sregs;
e97c3636 61static int cap_ppc_smt;
354ac20a 62static int cap_ppc_rma;
0f5cb298 63static int cap_spapr_tce;
f1af19d7 64static int cap_hior;
d67d40ea 65static int cap_one_reg;
3b961124 66static int cap_epr;
fc87e185 67
c821c2bd
AG
68/* XXX We have a race condition where we actually have a level triggered
69 * interrupt, but the infrastructure can't expose that yet, so the guest
70 * takes but ignores it, goes to sleep and never gets notified that there's
71 * still an interrupt pending.
c6a94ba5 72 *
c821c2bd
AG
73 * As a quick workaround, let's just wake up again 20 ms after we injected
74 * an interrupt. That way we can assure that we're always reinjecting
75 * interrupts in case the guest swallowed them.
c6a94ba5
AG
76 */
77static QEMUTimer *idle_timer;
78
d5a68146 79static void kvm_kick_cpu(void *opaque)
c6a94ba5 80{
d5a68146 81 PowerPCCPU *cpu = opaque;
d5a68146 82
c08d7424 83 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
84}
85
5ba4576b
AF
86static int kvm_ppc_register_host_cpu_type(void);
87
cad1e282 88int kvm_arch_init(KVMState *s)
d76d1650 89{
fc87e185 90 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 91 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 92 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 93 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 94 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 95 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 96 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
d67d40ea 97 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 98 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
3b961124 99 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
fc87e185
AG
100
101 if (!cap_interrupt_level) {
102 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
103 "VM to stall at times!\n");
104 }
105
5ba4576b
AF
106 kvm_ppc_register_host_cpu_type();
107
d76d1650
AJ
108 return 0;
109}
110
1bc22652 111static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 112{
1bc22652
AF
113 CPUPPCState *cenv = &cpu->env;
114 CPUState *cs = CPU(cpu);
861bbc80 115 struct kvm_sregs sregs;
5666ca4a
SW
116 int ret;
117
118 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
119 /* What we're really trying to say is "if we're on BookE, we use
120 the native PVR for now". This is the only sane way to check
121 it though, so we potentially confuse users that they can run
122 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
123 return 0;
124 } else {
90dc8812 125 if (!cap_segstate) {
64e07be5
AG
126 fprintf(stderr, "kvm error: missing PVR setting capability\n");
127 return -ENOSYS;
5666ca4a 128 }
5666ca4a
SW
129 }
130
1bc22652 131 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
132 if (ret) {
133 return ret;
134 }
861bbc80
AG
135
136 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 137 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
138}
139
93dd5e85 140/* Set up a shared TLB array with KVM */
1bc22652 141static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 142{
1bc22652
AF
143 CPUPPCState *env = &cpu->env;
144 CPUState *cs = CPU(cpu);
93dd5e85
SW
145 struct kvm_book3e_206_tlb_params params = {};
146 struct kvm_config_tlb cfg = {};
147 struct kvm_enable_cap encap = {};
148 unsigned int entries = 0;
149 int ret, i;
150
151 if (!kvm_enabled() ||
a60f24b5 152 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
153 return 0;
154 }
155
156 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
157
158 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
159 params.tlb_sizes[i] = booke206_tlb_size(env, i);
160 params.tlb_ways[i] = booke206_tlb_ways(env, i);
161 entries += params.tlb_sizes[i];
162 }
163
164 assert(entries == env->nb_tlb);
165 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
166
167 env->tlb_dirty = true;
168
169 cfg.array = (uintptr_t)env->tlb.tlbm;
170 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
171 cfg.params = (uintptr_t)&params;
172 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
173
174 encap.cap = KVM_CAP_SW_TLB;
175 encap.args[0] = (uintptr_t)&cfg;
176
1bc22652 177 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
93dd5e85
SW
178 if (ret < 0) {
179 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
180 __func__, strerror(-ret));
181 return ret;
182 }
183
184 env->kvm_sw_tlb = true;
185 return 0;
186}
187
4656e1f0
BH
188
189#if defined(TARGET_PPC64)
a60f24b5 190static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
191 struct kvm_ppc_smmu_info *info)
192{
a60f24b5
AF
193 CPUPPCState *env = &cpu->env;
194 CPUState *cs = CPU(cpu);
195
4656e1f0
BH
196 memset(info, 0, sizeof(*info));
197
198 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
199 * need to "guess" what the supported page sizes are.
200 *
201 * For that to work we make a few assumptions:
202 *
203 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
204 * KVM which only supports 4K and 16M pages, but supports them
205 * regardless of the backing store characteritics. We also don't
206 * support 1T segments.
207 *
208 * This is safe as if HV KVM ever supports that capability or PR
209 * KVM grows supports for more page/segment sizes, those versions
210 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
211 * will not hit this fallback
212 *
213 * - Else we are running HV KVM. This means we only support page
214 * sizes that fit in the backing store. Additionally we only
215 * advertize 64K pages if the processor is ARCH 2.06 and we assume
216 * P7 encodings for the SLB and hash table. Here too, we assume
217 * support for any newer processor will mean a kernel that
218 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
219 * this fallback.
220 */
a60f24b5 221 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
222 /* No flags */
223 info->flags = 0;
224 info->slb_size = 64;
225
226 /* Standard 4k base page size segment */
227 info->sps[0].page_shift = 12;
228 info->sps[0].slb_enc = 0;
229 info->sps[0].enc[0].page_shift = 12;
230 info->sps[0].enc[0].pte_enc = 0;
231
232 /* Standard 16M large page size segment */
233 info->sps[1].page_shift = 24;
234 info->sps[1].slb_enc = SLB_VSID_L;
235 info->sps[1].enc[0].page_shift = 24;
236 info->sps[1].enc[0].pte_enc = 0;
237 } else {
238 int i = 0;
239
240 /* HV KVM has backing store size restrictions */
241 info->flags = KVM_PPC_PAGE_SIZES_REAL;
242
243 if (env->mmu_model & POWERPC_MMU_1TSEG) {
244 info->flags |= KVM_PPC_1T_SEGMENTS;
245 }
246
247 if (env->mmu_model == POWERPC_MMU_2_06) {
248 info->slb_size = 32;
249 } else {
250 info->slb_size = 64;
251 }
252
253 /* Standard 4k base page size segment */
254 info->sps[i].page_shift = 12;
255 info->sps[i].slb_enc = 0;
256 info->sps[i].enc[0].page_shift = 12;
257 info->sps[i].enc[0].pte_enc = 0;
258 i++;
259
260 /* 64K on MMU 2.06 */
261 if (env->mmu_model == POWERPC_MMU_2_06) {
262 info->sps[i].page_shift = 16;
263 info->sps[i].slb_enc = 0x110;
264 info->sps[i].enc[0].page_shift = 16;
265 info->sps[i].enc[0].pte_enc = 1;
266 i++;
267 }
268
269 /* Standard 16M large page size segment */
270 info->sps[i].page_shift = 24;
271 info->sps[i].slb_enc = SLB_VSID_L;
272 info->sps[i].enc[0].page_shift = 24;
273 info->sps[i].enc[0].pte_enc = 0;
274 }
275}
276
a60f24b5 277static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 278{
a60f24b5 279 CPUState *cs = CPU(cpu);
4656e1f0
BH
280 int ret;
281
a60f24b5
AF
282 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
283 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
284 if (ret == 0) {
285 return;
286 }
287 }
288
a60f24b5 289 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
290}
291
292static long getrampagesize(void)
293{
294 struct statfs fs;
295 int ret;
296
297 if (!mem_path) {
298 /* guest RAM is backed by normal anonymous pages */
299 return getpagesize();
300 }
301
302 do {
303 ret = statfs(mem_path, &fs);
304 } while (ret != 0 && errno == EINTR);
305
306 if (ret != 0) {
307 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
308 strerror(errno));
309 exit(1);
310 }
311
312#define HUGETLBFS_MAGIC 0x958458f6
313
314 if (fs.f_type != HUGETLBFS_MAGIC) {
315 /* Explicit mempath, but it's ordinary pages */
316 return getpagesize();
317 }
318
319 /* It's hugepage, return the huge page size */
320 return fs.f_bsize;
321}
322
323static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
324{
325 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
326 return true;
327 }
328
329 return (1ul << shift) <= rampgsize;
330}
331
a60f24b5 332static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
333{
334 static struct kvm_ppc_smmu_info smmu_info;
335 static bool has_smmu_info;
a60f24b5 336 CPUPPCState *env = &cpu->env;
4656e1f0
BH
337 long rampagesize;
338 int iq, ik, jq, jk;
339
340 /* We only handle page sizes for 64-bit server guests for now */
341 if (!(env->mmu_model & POWERPC_MMU_64)) {
342 return;
343 }
344
345 /* Collect MMU info from kernel if not already */
346 if (!has_smmu_info) {
a60f24b5 347 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
348 has_smmu_info = true;
349 }
350
351 rampagesize = getrampagesize();
352
353 /* Convert to QEMU form */
354 memset(&env->sps, 0, sizeof(env->sps));
355
356 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
357 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
358 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
359
360 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
361 ksps->page_shift)) {
362 continue;
363 }
364 qsps->page_shift = ksps->page_shift;
365 qsps->slb_enc = ksps->slb_enc;
366 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
367 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
368 ksps->enc[jk].page_shift)) {
369 continue;
370 }
371 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
372 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
373 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
374 break;
375 }
376 }
377 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
378 break;
379 }
380 }
381 env->slb_nr = smmu_info.slb_size;
382 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
383 env->mmu_model |= POWERPC_MMU_1TSEG;
384 } else {
385 env->mmu_model &= ~POWERPC_MMU_1TSEG;
386 }
387}
388#else /* defined (TARGET_PPC64) */
389
a60f24b5 390static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
391{
392}
393
394#endif /* !defined (TARGET_PPC64) */
395
b164e48e
EH
396unsigned long kvm_arch_vcpu_id(CPUState *cpu)
397{
398 return cpu->cpu_index;
399}
400
20d695a9 401int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 402{
20d695a9
AF
403 PowerPCCPU *cpu = POWERPC_CPU(cs);
404 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
405 int ret;
406
4656e1f0 407 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 408 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
409
410 /* Synchronize sregs with kvm */
1bc22652 411 ret = kvm_arch_sync_sregs(cpu);
5666ca4a
SW
412 if (ret) {
413 return ret;
414 }
861bbc80 415
d5a68146 416 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
c821c2bd 417
93dd5e85
SW
418 /* Some targets support access to KVM's guest TLB. */
419 switch (cenv->mmu_model) {
420 case POWERPC_MMU_BOOKE206:
1bc22652 421 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
422 break;
423 default:
424 break;
425 }
426
861bbc80 427 return ret;
d76d1650
AJ
428}
429
20d695a9 430void kvm_arch_reset_vcpu(CPUState *cpu)
caa5af0f
JK
431{
432}
433
1bc22652 434static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 435{
1bc22652
AF
436 CPUPPCState *env = &cpu->env;
437 CPUState *cs = CPU(cpu);
93dd5e85
SW
438 struct kvm_dirty_tlb dirty_tlb;
439 unsigned char *bitmap;
440 int ret;
441
442 if (!env->kvm_sw_tlb) {
443 return;
444 }
445
446 bitmap = g_malloc((env->nb_tlb + 7) / 8);
447 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
448
449 dirty_tlb.bitmap = (uintptr_t)bitmap;
450 dirty_tlb.num_dirty = env->nb_tlb;
451
1bc22652 452 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
453 if (ret) {
454 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
455 __func__, strerror(-ret));
456 }
457
458 g_free(bitmap);
459}
460
d67d40ea
DG
461static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
462{
463 PowerPCCPU *cpu = POWERPC_CPU(cs);
464 CPUPPCState *env = &cpu->env;
465 union {
466 uint32_t u32;
467 uint64_t u64;
468 } val;
469 struct kvm_one_reg reg = {
470 .id = id,
471 .addr = (uintptr_t) &val,
472 };
473 int ret;
474
475 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
476 if (ret != 0) {
477 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
478 spr, strerror(errno));
479 } else {
480 switch (id & KVM_REG_SIZE_MASK) {
481 case KVM_REG_SIZE_U32:
482 env->spr[spr] = val.u32;
483 break;
484
485 case KVM_REG_SIZE_U64:
486 env->spr[spr] = val.u64;
487 break;
488
489 default:
490 /* Don't handle this size yet */
491 abort();
492 }
493 }
494}
495
496static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
497{
498 PowerPCCPU *cpu = POWERPC_CPU(cs);
499 CPUPPCState *env = &cpu->env;
500 union {
501 uint32_t u32;
502 uint64_t u64;
503 } val;
504 struct kvm_one_reg reg = {
505 .id = id,
506 .addr = (uintptr_t) &val,
507 };
508 int ret;
509
510 switch (id & KVM_REG_SIZE_MASK) {
511 case KVM_REG_SIZE_U32:
512 val.u32 = env->spr[spr];
513 break;
514
515 case KVM_REG_SIZE_U64:
516 val.u64 = env->spr[spr];
517 break;
518
519 default:
520 /* Don't handle this size yet */
521 abort();
522 }
523
524 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
525 if (ret != 0) {
526 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
527 spr, strerror(errno));
528 }
529}
530
70b79849
DG
531static int kvm_put_fp(CPUState *cs)
532{
533 PowerPCCPU *cpu = POWERPC_CPU(cs);
534 CPUPPCState *env = &cpu->env;
535 struct kvm_one_reg reg;
536 int i;
537 int ret;
538
539 if (env->insns_flags & PPC_FLOAT) {
540 uint64_t fpscr = env->fpscr;
541 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
542
543 reg.id = KVM_REG_PPC_FPSCR;
544 reg.addr = (uintptr_t)&fpscr;
545 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
546 if (ret < 0) {
547 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
548 return ret;
549 }
550
551 for (i = 0; i < 32; i++) {
552 uint64_t vsr[2];
553
554 vsr[0] = float64_val(env->fpr[i]);
555 vsr[1] = env->vsr[i];
556 reg.addr = (uintptr_t) &vsr;
557 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
558
559 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
560 if (ret < 0) {
561 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
562 i, strerror(errno));
563 return ret;
564 }
565 }
566 }
567
568 if (env->insns_flags & PPC_ALTIVEC) {
569 reg.id = KVM_REG_PPC_VSCR;
570 reg.addr = (uintptr_t)&env->vscr;
571 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
572 if (ret < 0) {
573 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
574 return ret;
575 }
576
577 for (i = 0; i < 32; i++) {
578 reg.id = KVM_REG_PPC_VR(i);
579 reg.addr = (uintptr_t)&env->avr[i];
580 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
581 if (ret < 0) {
582 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
583 return ret;
584 }
585 }
586 }
587
588 return 0;
589}
590
591static int kvm_get_fp(CPUState *cs)
592{
593 PowerPCCPU *cpu = POWERPC_CPU(cs);
594 CPUPPCState *env = &cpu->env;
595 struct kvm_one_reg reg;
596 int i;
597 int ret;
598
599 if (env->insns_flags & PPC_FLOAT) {
600 uint64_t fpscr;
601 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
602
603 reg.id = KVM_REG_PPC_FPSCR;
604 reg.addr = (uintptr_t)&fpscr;
605 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
606 if (ret < 0) {
607 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
608 return ret;
609 } else {
610 env->fpscr = fpscr;
611 }
612
613 for (i = 0; i < 32; i++) {
614 uint64_t vsr[2];
615
616 reg.addr = (uintptr_t) &vsr;
617 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
618
619 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
620 if (ret < 0) {
621 dprintf("Unable to get %s%d from KVM: %s\n",
622 vsx ? "VSR" : "FPR", i, strerror(errno));
623 return ret;
624 } else {
625 env->fpr[i] = vsr[0];
626 if (vsx) {
627 env->vsr[i] = vsr[1];
628 }
629 }
630 }
631 }
632
633 if (env->insns_flags & PPC_ALTIVEC) {
634 reg.id = KVM_REG_PPC_VSCR;
635 reg.addr = (uintptr_t)&env->vscr;
636 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637 if (ret < 0) {
638 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
639 return ret;
640 }
641
642 for (i = 0; i < 32; i++) {
643 reg.id = KVM_REG_PPC_VR(i);
644 reg.addr = (uintptr_t)&env->avr[i];
645 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
646 if (ret < 0) {
647 dprintf("Unable to get VR%d from KVM: %s\n",
648 i, strerror(errno));
649 return ret;
650 }
651 }
652 }
653
654 return 0;
655}
656
20d695a9 657int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 658{
20d695a9
AF
659 PowerPCCPU *cpu = POWERPC_CPU(cs);
660 CPUPPCState *env = &cpu->env;
d76d1650
AJ
661 struct kvm_regs regs;
662 int ret;
663 int i;
664
1bc22652
AF
665 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
666 if (ret < 0) {
d76d1650 667 return ret;
1bc22652 668 }
d76d1650
AJ
669
670 regs.ctr = env->ctr;
671 regs.lr = env->lr;
da91a00f 672 regs.xer = cpu_read_xer(env);
d76d1650
AJ
673 regs.msr = env->msr;
674 regs.pc = env->nip;
675
676 regs.srr0 = env->spr[SPR_SRR0];
677 regs.srr1 = env->spr[SPR_SRR1];
678
679 regs.sprg0 = env->spr[SPR_SPRG0];
680 regs.sprg1 = env->spr[SPR_SPRG1];
681 regs.sprg2 = env->spr[SPR_SPRG2];
682 regs.sprg3 = env->spr[SPR_SPRG3];
683 regs.sprg4 = env->spr[SPR_SPRG4];
684 regs.sprg5 = env->spr[SPR_SPRG5];
685 regs.sprg6 = env->spr[SPR_SPRG6];
686 regs.sprg7 = env->spr[SPR_SPRG7];
687
90dc8812
SW
688 regs.pid = env->spr[SPR_BOOKE_PID];
689
d76d1650
AJ
690 for (i = 0;i < 32; i++)
691 regs.gpr[i] = env->gpr[i];
692
1bc22652 693 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
694 if (ret < 0)
695 return ret;
696
70b79849
DG
697 kvm_put_fp(cs);
698
93dd5e85 699 if (env->tlb_dirty) {
1bc22652 700 kvm_sw_tlb_put(cpu);
93dd5e85
SW
701 env->tlb_dirty = false;
702 }
703
f1af19d7
DG
704 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
705 struct kvm_sregs sregs;
706
707 sregs.pvr = env->spr[SPR_PVR];
708
709 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
710
711 /* Sync SLB */
712#ifdef TARGET_PPC64
713 for (i = 0; i < 64; i++) {
714 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
715 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
716 }
717#endif
718
719 /* Sync SRs */
720 for (i = 0; i < 16; i++) {
721 sregs.u.s.ppc32.sr[i] = env->sr[i];
722 }
723
724 /* Sync BATs */
725 for (i = 0; i < 8; i++) {
ef8beb0e
AG
726 /* Beware. We have to swap upper and lower bits here */
727 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
728 | env->DBAT[1][i];
729 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
730 | env->IBAT[1][i];
f1af19d7
DG
731 }
732
1bc22652 733 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
f1af19d7
DG
734 if (ret) {
735 return ret;
736 }
737 }
738
739 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
740 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
741 }
f1af19d7 742
d67d40ea
DG
743 if (cap_one_reg) {
744 int i;
745
746 /* We deliberately ignore errors here, for kernels which have
747 * the ONE_REG calls, but don't support the specific
748 * registers, there's a reasonable chance things will still
749 * work, at least until we try to migrate. */
750 for (i = 0; i < 1024; i++) {
751 uint64_t id = env->spr_cb[i].one_reg_id;
752
753 if (id != 0) {
754 kvm_put_one_spr(cs, id, i);
755 }
f1af19d7
DG
756 }
757 }
758
d76d1650
AJ
759 return ret;
760}
761
20d695a9 762int kvm_arch_get_registers(CPUState *cs)
d76d1650 763{
20d695a9
AF
764 PowerPCCPU *cpu = POWERPC_CPU(cs);
765 CPUPPCState *env = &cpu->env;
d76d1650 766 struct kvm_regs regs;
ba5e5090 767 struct kvm_sregs sregs;
90dc8812 768 uint32_t cr;
138b38b6 769 int i, ret;
d76d1650 770
1bc22652 771 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
772 if (ret < 0)
773 return ret;
774
90dc8812
SW
775 cr = regs.cr;
776 for (i = 7; i >= 0; i--) {
777 env->crf[i] = cr & 15;
778 cr >>= 4;
779 }
ba5e5090 780
d76d1650
AJ
781 env->ctr = regs.ctr;
782 env->lr = regs.lr;
da91a00f 783 cpu_write_xer(env, regs.xer);
d76d1650
AJ
784 env->msr = regs.msr;
785 env->nip = regs.pc;
786
787 env->spr[SPR_SRR0] = regs.srr0;
788 env->spr[SPR_SRR1] = regs.srr1;
789
790 env->spr[SPR_SPRG0] = regs.sprg0;
791 env->spr[SPR_SPRG1] = regs.sprg1;
792 env->spr[SPR_SPRG2] = regs.sprg2;
793 env->spr[SPR_SPRG3] = regs.sprg3;
794 env->spr[SPR_SPRG4] = regs.sprg4;
795 env->spr[SPR_SPRG5] = regs.sprg5;
796 env->spr[SPR_SPRG6] = regs.sprg6;
797 env->spr[SPR_SPRG7] = regs.sprg7;
798
90dc8812
SW
799 env->spr[SPR_BOOKE_PID] = regs.pid;
800
d76d1650
AJ
801 for (i = 0;i < 32; i++)
802 env->gpr[i] = regs.gpr[i];
803
70b79849
DG
804 kvm_get_fp(cs);
805
90dc8812 806 if (cap_booke_sregs) {
1bc22652 807 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
808 if (ret < 0) {
809 return ret;
810 }
811
812 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
813 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
814 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
815 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
816 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
817 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
818 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
819 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
820 env->spr[SPR_DECR] = sregs.u.e.dec;
821 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
822 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
823 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
824 }
825
826 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
827 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
828 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
829 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
830 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
831 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
832 }
833
834 if (sregs.u.e.features & KVM_SREGS_E_64) {
835 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
836 }
837
838 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
839 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
840 }
841
842 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
843 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
844 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
845 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
846 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
847 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
848 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
849 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
850 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
851 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
852 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
853 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
854 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
855 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
856 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
857 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
858 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
859
860 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
861 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
862 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
863 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
864 }
865
866 if (sregs.u.e.features & KVM_SREGS_E_PM) {
867 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
868 }
869
870 if (sregs.u.e.features & KVM_SREGS_E_PC) {
871 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
872 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
873 }
874 }
875
876 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
877 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
878 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
879 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
880 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
881 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
882 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
883 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
884 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
885 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
886 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
887 }
888
889 if (sregs.u.e.features & KVM_SREGS_EXP) {
890 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
891 }
892
893 if (sregs.u.e.features & KVM_SREGS_E_PD) {
894 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
895 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
896 }
897
898 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
899 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
900 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
901 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
902
903 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
904 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
905 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
906 }
907 }
fafc0b6a 908 }
90dc8812 909
90dc8812 910 if (cap_segstate) {
1bc22652 911 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
912 if (ret < 0) {
913 return ret;
914 }
915
bb593904 916 ppc_store_sdr1(env, sregs.u.s.sdr1);
ba5e5090
AG
917
918 /* Sync SLB */
82c09f2f 919#ifdef TARGET_PPC64
ba5e5090
AG
920 for (i = 0; i < 64; i++) {
921 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
922 sregs.u.s.ppc64.slb[i].slbv);
923 }
82c09f2f 924#endif
ba5e5090
AG
925
926 /* Sync SRs */
927 for (i = 0; i < 16; i++) {
928 env->sr[i] = sregs.u.s.ppc32.sr[i];
929 }
930
931 /* Sync BATs */
932 for (i = 0; i < 8; i++) {
933 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
934 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
935 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
936 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
937 }
fafc0b6a 938 }
ba5e5090 939
d67d40ea
DG
940 if (cap_hior) {
941 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
942 }
943
944 if (cap_one_reg) {
945 int i;
946
947 /* We deliberately ignore errors here, for kernels which have
948 * the ONE_REG calls, but don't support the specific
949 * registers, there's a reasonable chance things will still
950 * work, at least until we try to migrate. */
951 for (i = 0; i < 1024; i++) {
952 uint64_t id = env->spr_cb[i].one_reg_id;
953
954 if (id != 0) {
955 kvm_get_one_spr(cs, id, i);
956 }
957 }
958 }
959
d76d1650
AJ
960 return 0;
961}
962
1bc22652 963int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
964{
965 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
966
967 if (irq != PPC_INTERRUPT_EXT) {
968 return 0;
969 }
970
971 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
972 return 0;
973 }
974
1bc22652 975 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
976
977 return 0;
978}
979
16415335
AG
980#if defined(TARGET_PPCEMB)
981#define PPC_INPUT_INT PPC40x_INPUT_INT
982#elif defined(TARGET_PPC64)
983#define PPC_INPUT_INT PPC970_INPUT_INT
984#else
985#define PPC_INPUT_INT PPC6xx_INPUT_INT
986#endif
987
20d695a9 988void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 989{
20d695a9
AF
990 PowerPCCPU *cpu = POWERPC_CPU(cs);
991 CPUPPCState *env = &cpu->env;
d76d1650
AJ
992 int r;
993 unsigned irq;
994
5cbdb3a3 995 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 996 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
997 if (!cap_interrupt_level &&
998 run->ready_for_interrupt_injection &&
259186a7 999 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 1000 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
1001 {
1002 /* For now KVM disregards the 'irq' argument. However, in the
1003 * future KVM could cache it in-kernel to avoid a heavyweight exit
1004 * when reading the UIC.
1005 */
fc87e185 1006 irq = KVM_INTERRUPT_SET;
d76d1650
AJ
1007
1008 dprintf("injected interrupt %d\n", irq);
1bc22652 1009 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1010 if (r < 0) {
1011 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1012 }
c821c2bd
AG
1013
1014 /* Always wake up soon in case the interrupt was level based */
74475455 1015 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
c821c2bd 1016 (get_ticks_per_sec() / 50));
d76d1650
AJ
1017 }
1018
1019 /* We don't know if there are more interrupts pending after this. However,
1020 * the guest will return to userspace in the course of handling this one
1021 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
1022}
1023
20d695a9 1024void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
d76d1650 1025{
d76d1650
AJ
1026}
1027
20d695a9 1028int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1029{
259186a7 1030 return cs->halted;
0af691d7
MT
1031}
1032
259186a7 1033static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1034{
259186a7
AF
1035 CPUState *cs = CPU(cpu);
1036 CPUPPCState *env = &cpu->env;
1037
1038 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1039 cs->halted = 1;
d76d1650
AJ
1040 env->exception_index = EXCP_HLT;
1041 }
1042
bb4ea393 1043 return 0;
d76d1650
AJ
1044}
1045
1046/* map dcr access to existing qemu dcr emulation */
1328c2bf 1047static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1048{
1049 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1050 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1051
bb4ea393 1052 return 0;
d76d1650
AJ
1053}
1054
1328c2bf 1055static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1056{
1057 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1058 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1059
bb4ea393 1060 return 0;
d76d1650
AJ
1061}
1062
20d695a9 1063int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1064{
20d695a9
AF
1065 PowerPCCPU *cpu = POWERPC_CPU(cs);
1066 CPUPPCState *env = &cpu->env;
bb4ea393 1067 int ret;
d76d1650
AJ
1068
1069 switch (run->exit_reason) {
1070 case KVM_EXIT_DCR:
1071 if (run->dcr.is_write) {
1072 dprintf("handle dcr write\n");
1073 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1074 } else {
1075 dprintf("handle dcr read\n");
1076 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1077 }
1078 break;
1079 case KVM_EXIT_HLT:
1080 dprintf("handle halt\n");
259186a7 1081 ret = kvmppc_handle_halt(cpu);
d76d1650 1082 break;
c6304a4a 1083#if defined(TARGET_PPC64)
f61b4bed
AG
1084 case KVM_EXIT_PAPR_HCALL:
1085 dprintf("handle PAPR hypercall\n");
20d695a9 1086 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1087 run->papr_hcall.nr,
f61b4bed 1088 run->papr_hcall.args);
78e8fde2 1089 ret = 0;
f61b4bed
AG
1090 break;
1091#endif
5b95b8b9
AG
1092 case KVM_EXIT_EPR:
1093 dprintf("handle epr\n");
1094 run->epr.epr = ldl_phys(env->mpic_iack);
1095 ret = 0;
1096 break;
73aaec4a
JK
1097 default:
1098 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1099 ret = -1;
1100 break;
d76d1650
AJ
1101 }
1102
1103 return ret;
1104}
1105
dc333cd6
AG
1106static int read_cpuinfo(const char *field, char *value, int len)
1107{
1108 FILE *f;
1109 int ret = -1;
1110 int field_len = strlen(field);
1111 char line[512];
1112
1113 f = fopen("/proc/cpuinfo", "r");
1114 if (!f) {
1115 return -1;
1116 }
1117
1118 do {
1119 if(!fgets(line, sizeof(line), f)) {
1120 break;
1121 }
1122 if (!strncmp(line, field, field_len)) {
ae215068 1123 pstrcpy(value, len, line);
dc333cd6
AG
1124 ret = 0;
1125 break;
1126 }
1127 } while(*line);
1128
1129 fclose(f);
1130
1131 return ret;
1132}
1133
1134uint32_t kvmppc_get_tbfreq(void)
1135{
1136 char line[512];
1137 char *ns;
1138 uint32_t retval = get_ticks_per_sec();
1139
1140 if (read_cpuinfo("timebase", line, sizeof(line))) {
1141 return retval;
1142 }
1143
1144 if (!(ns = strchr(line, ':'))) {
1145 return retval;
1146 }
1147
1148 ns++;
1149
1150 retval = atoi(ns);
1151 return retval;
1152}
4513d923 1153
eadaada1
AG
1154/* Try to find a device tree node for a CPU with clock-frequency property */
1155static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1156{
1157 struct dirent *dirp;
1158 DIR *dp;
1159
1160 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1161 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1162 return -1;
1163 }
1164
1165 buf[0] = '\0';
1166 while ((dirp = readdir(dp)) != NULL) {
1167 FILE *f;
1168 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1169 dirp->d_name);
1170 f = fopen(buf, "r");
1171 if (f) {
1172 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1173 fclose(f);
1174 break;
1175 }
1176 buf[0] = '\0';
1177 }
1178 closedir(dp);
1179 if (buf[0] == '\0') {
1180 printf("Unknown host!\n");
1181 return -1;
1182 }
1183
1184 return 0;
1185}
1186
9bc884b7
DG
1187/* Read a CPU node property from the host device tree that's a single
1188 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1189 * (can't find or open the property, or doesn't understand the
1190 * format) */
1191static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 1192{
9bc884b7
DG
1193 char buf[PATH_MAX];
1194 union {
1195 uint32_t v32;
1196 uint64_t v64;
1197 } u;
eadaada1
AG
1198 FILE *f;
1199 int len;
1200
1201 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 1202 return -1;
eadaada1
AG
1203 }
1204
9bc884b7
DG
1205 strncat(buf, "/", sizeof(buf) - strlen(buf));
1206 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
1207
1208 f = fopen(buf, "rb");
1209 if (!f) {
1210 return -1;
1211 }
1212
9bc884b7 1213 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1214 fclose(f);
1215 switch (len) {
9bc884b7
DG
1216 case 4:
1217 /* property is a 32-bit quantity */
1218 return be32_to_cpu(u.v32);
1219 case 8:
1220 return be64_to_cpu(u.v64);
eadaada1
AG
1221 }
1222
1223 return 0;
1224}
1225
9bc884b7
DG
1226uint64_t kvmppc_get_clockfreq(void)
1227{
1228 return kvmppc_read_int_cpu_dt("clock-frequency");
1229}
1230
6659394f
DG
1231uint32_t kvmppc_get_vmx(void)
1232{
1233 return kvmppc_read_int_cpu_dt("ibm,vmx");
1234}
1235
1236uint32_t kvmppc_get_dfp(void)
1237{
1238 return kvmppc_read_int_cpu_dt("ibm,dfp");
1239}
1240
1a61a9ae
SY
1241static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1242 {
1243 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1244 CPUState *cs = CPU(cpu);
1245
1246 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1247 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1248 return 0;
1249 }
1250
1251 return 1;
1252}
1253
1254int kvmppc_get_hasidle(CPUPPCState *env)
1255{
1256 struct kvm_ppc_pvinfo pvinfo;
1257
1258 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1259 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1260 return 1;
1261 }
1262
1263 return 0;
1264}
1265
1328c2bf 1266int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1267{
1268 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1269 struct kvm_ppc_pvinfo pvinfo;
1270
1a61a9ae 1271 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1272 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1273 return 0;
1274 }
45024f09
AG
1275
1276 /*
1277 * Fallback to always fail hypercalls:
1278 *
1279 * li r3, -1
1280 * nop
1281 * nop
1282 * nop
1283 */
1284
1285 hc[0] = 0x3860ffff;
1286 hc[1] = 0x60000000;
1287 hc[2] = 0x60000000;
1288 hc[3] = 0x60000000;
1289
1290 return 0;
1291}
1292
1bc22652 1293void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 1294{
1bc22652
AF
1295 CPUPPCState *env = &cpu->env;
1296 CPUState *cs = CPU(cpu);
94135e81 1297 struct kvm_enable_cap cap = {};
f61b4bed
AG
1298 int ret;
1299
f61b4bed 1300 cap.cap = KVM_CAP_PPC_PAPR;
1bc22652 1301 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
f61b4bed
AG
1302
1303 if (ret) {
f1af19d7 1304 cpu_abort(env, "This KVM version does not support PAPR\n");
94135e81 1305 }
f61b4bed
AG
1306}
1307
5b95b8b9
AG
1308void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1309{
1310 CPUPPCState *env = &cpu->env;
1311 CPUState *cs = CPU(cpu);
1312 struct kvm_enable_cap cap = {};
1313 int ret;
1314
1315 cap.cap = KVM_CAP_PPC_EPR;
1316 cap.args[0] = mpic_proxy;
1317 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1318
1319 if (ret && mpic_proxy) {
1320 cpu_abort(env, "This KVM version does not support EPR\n");
1321 }
1322}
1323
e97c3636
DG
1324int kvmppc_smt_threads(void)
1325{
1326 return cap_ppc_smt ? cap_ppc_smt : 1;
1327}
1328
7f763a5d 1329#ifdef TARGET_PPC64
354ac20a
DG
1330off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1331{
1332 void *rma;
1333 off_t size;
1334 int fd;
1335 struct kvm_allocate_rma ret;
1336 MemoryRegion *rma_region;
1337
1338 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1339 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1340 * not necessary on this hardware
1341 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1342 *
1343 * FIXME: We should allow the user to force contiguous RMA
1344 * allocation in the cap_ppc_rma==1 case.
1345 */
1346 if (cap_ppc_rma < 2) {
1347 return 0;
1348 }
1349
1350 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1351 if (fd < 0) {
1352 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1353 strerror(errno));
1354 return -1;
1355 }
1356
1357 size = MIN(ret.rma_size, 256ul << 20);
1358
1359 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1360 if (rma == MAP_FAILED) {
1361 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1362 return -1;
1363 };
1364
1365 rma_region = g_new(MemoryRegion, 1);
6148b23d
AK
1366 memory_region_init_ram_ptr(rma_region, name, size, rma);
1367 vmstate_register_ram_global(rma_region);
354ac20a
DG
1368 memory_region_add_subregion(sysmem, 0, rma_region);
1369
1370 return size;
1371}
1372
7f763a5d
DG
1373uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1374{
1375 if (cap_ppc_rma >= 2) {
1376 return current_size;
1377 }
1378 return MIN(current_size,
1379 getrampagesize() << (hash_shift - 7));
1380}
1381#endif
1382
0f5cb298
DG
1383void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1384{
1385 struct kvm_create_spapr_tce args = {
1386 .liobn = liobn,
1387 .window_size = window_size,
1388 };
1389 long len;
1390 int fd;
1391 void *table;
1392
b5aec396
DG
1393 /* Must set fd to -1 so we don't try to munmap when called for
1394 * destroying the table, which the upper layers -will- do
1395 */
1396 *pfd = -1;
0f5cb298
DG
1397 if (!cap_spapr_tce) {
1398 return NULL;
1399 }
1400
1401 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1402 if (fd < 0) {
b5aec396
DG
1403 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1404 liobn);
0f5cb298
DG
1405 return NULL;
1406 }
1407
ad0ebb91 1408 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
0f5cb298
DG
1409 /* FIXME: round this up to page size */
1410
74b41e56 1411 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1412 if (table == MAP_FAILED) {
b5aec396
DG
1413 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1414 liobn);
0f5cb298
DG
1415 close(fd);
1416 return NULL;
1417 }
1418
1419 *pfd = fd;
1420 return table;
1421}
1422
1423int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1424{
1425 long len;
1426
1427 if (fd < 0) {
1428 return -1;
1429 }
1430
ad0ebb91 1431 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
0f5cb298
DG
1432 if ((munmap(table, len) < 0) ||
1433 (close(fd) < 0)) {
b5aec396
DG
1434 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1435 strerror(errno));
0f5cb298
DG
1436 /* Leak the table */
1437 }
1438
1439 return 0;
1440}
1441
7f763a5d
DG
1442int kvmppc_reset_htab(int shift_hint)
1443{
1444 uint32_t shift = shift_hint;
1445
ace9a2cb
DG
1446 if (!kvm_enabled()) {
1447 /* Full emulation, tell caller to allocate htab itself */
1448 return 0;
1449 }
1450 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
1451 int ret;
1452 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
1453 if (ret == -ENOTTY) {
1454 /* At least some versions of PR KVM advertise the
1455 * capability, but don't implement the ioctl(). Oops.
1456 * Return 0 so that we allocate the htab in qemu, as is
1457 * correct for PR. */
1458 return 0;
1459 } else if (ret < 0) {
7f763a5d
DG
1460 return ret;
1461 }
1462 return shift;
1463 }
1464
ace9a2cb
DG
1465 /* We have a kernel that predates the htab reset calls. For PR
1466 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1467 * this era, it has allocated a 16MB fixed size hash table
1468 * already. Kernels of this era have the GET_PVINFO capability
1469 * only on PR, so we use this hack to determine the right
1470 * answer */
1471 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1472 /* PR - tell caller to allocate htab */
1473 return 0;
1474 } else {
1475 /* HV - assume 16MB kernel allocated htab */
1476 return 24;
1477 }
7f763a5d
DG
1478}
1479
a1e98583
DG
1480static inline uint32_t mfpvr(void)
1481{
1482 uint32_t pvr;
1483
1484 asm ("mfpvr %0"
1485 : "=r"(pvr));
1486 return pvr;
1487}
1488
a7342588
DG
1489static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1490{
1491 if (on) {
1492 *word |= flags;
1493 } else {
1494 *word &= ~flags;
1495 }
1496}
1497
2985b86b 1498static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 1499{
2985b86b
AF
1500 assert(kvm_enabled());
1501}
1502
1503static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1504{
1505 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
1506 uint32_t vmx = kvmppc_get_vmx();
1507 uint32_t dfp = kvmppc_get_dfp();
a1e98583 1508
cfe34f44 1509 /* Now fix up the class with information we can query from the host */
a7342588 1510
70bca53f
AG
1511 if (vmx != -1) {
1512 /* Only override when we know what the host supports */
cfe34f44
AF
1513 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1514 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
1515 }
1516 if (dfp != -1) {
1517 /* Only override when we know what the host supports */
cfe34f44 1518 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 1519 }
a1e98583
DG
1520}
1521
55e5c285 1522int kvmppc_fixup_cpu(PowerPCCPU *cpu)
12b1143b 1523{
55e5c285 1524 CPUState *cs = CPU(cpu);
12b1143b
DG
1525 int smt;
1526
1527 /* Adjust cpu index for SMT */
1528 smt = kvmppc_smt_threads();
55e5c285
AF
1529 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1530 + (cs->cpu_index % smp_threads);
12b1143b
DG
1531
1532 return 0;
1533}
1534
3b961124
SY
1535bool kvmppc_has_cap_epr(void)
1536{
1537 return cap_epr;
1538}
1539
5ba4576b
AF
1540static int kvm_ppc_register_host_cpu_type(void)
1541{
1542 TypeInfo type_info = {
1543 .name = TYPE_HOST_POWERPC_CPU,
1544 .instance_init = kvmppc_host_cpu_initfn,
1545 .class_init = kvmppc_host_cpu_class_init,
1546 };
1547 uint32_t host_pvr = mfpvr();
1548 PowerPCCPUClass *pvr_pcc;
1549
1550 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1551 if (pvr_pcc == NULL) {
1552 return -1;
1553 }
1554 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1555 type_register(&type_info);
1556 return 0;
1557}
1558
12b1143b 1559
20d695a9 1560bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
1561{
1562 return true;
1563}
a1b87fe0 1564
20d695a9 1565int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
1566{
1567 return 1;
1568}
1569
1570int kvm_arch_on_sigbus(int code, void *addr)
1571{
1572 return 1;
1573}