]> git.proxmox.com Git - qemu.git/blame - target-ppc/kvm.c
pc: Reverse pc_init_pci() compatibility logic
[qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
1de7afc9 26#include "qemu/timer.h"
9c17d615
PB
27#include "sysemu/sysemu.h"
28#include "sysemu/kvm.h"
d76d1650
AJ
29#include "kvm_ppc.h"
30#include "cpu.h"
9c17d615
PB
31#include "sysemu/cpus.h"
32#include "sysemu/device_tree.h"
0f5cb298 33#include "hw/sysbus.h"
e97c3636 34#include "hw/spapr.h"
d76d1650 35
f61b4bed
AG
36#include "hw/sysbus.h"
37#include "hw/spapr.h"
38#include "hw/spapr_vio.h"
39
d76d1650
AJ
40//#define DEBUG_KVM
41
42#ifdef DEBUG_KVM
43#define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45#else
46#define dprintf(fmt, ...) \
47 do { } while (0)
48#endif
49
eadaada1
AG
50#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
94a8d39a
JK
52const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54};
55
fc87e185
AG
56static int cap_interrupt_unset = false;
57static int cap_interrupt_level = false;
90dc8812 58static int cap_segstate;
90dc8812 59static int cap_booke_sregs;
e97c3636 60static int cap_ppc_smt;
354ac20a 61static int cap_ppc_rma;
0f5cb298 62static int cap_spapr_tce;
f1af19d7 63static int cap_hior;
fc87e185 64
c821c2bd
AG
65/* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
c6a94ba5 69 *
c821c2bd
AG
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
c6a94ba5
AG
73 */
74static QEMUTimer *idle_timer;
75
d5a68146 76static void kvm_kick_cpu(void *opaque)
c6a94ba5 77{
d5a68146 78 PowerPCCPU *cpu = opaque;
d5a68146 79
c08d7424 80 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
81}
82
cad1e282 83int kvm_arch_init(KVMState *s)
d76d1650 84{
fc87e185 85 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 86 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 87 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 88 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 89 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 90 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 91 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
f1af19d7 92 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
fc87e185
AG
93
94 if (!cap_interrupt_level) {
95 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
97 }
98
d76d1650
AJ
99 return 0;
100}
101
1bc22652 102static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 103{
1bc22652
AF
104 CPUPPCState *cenv = &cpu->env;
105 CPUState *cs = CPU(cpu);
861bbc80 106 struct kvm_sregs sregs;
5666ca4a
SW
107 int ret;
108
109 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
110 /* What we're really trying to say is "if we're on BookE, we use
111 the native PVR for now". This is the only sane way to check
112 it though, so we potentially confuse users that they can run
113 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
114 return 0;
115 } else {
90dc8812 116 if (!cap_segstate) {
64e07be5
AG
117 fprintf(stderr, "kvm error: missing PVR setting capability\n");
118 return -ENOSYS;
5666ca4a 119 }
5666ca4a
SW
120 }
121
1bc22652 122 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
123 if (ret) {
124 return ret;
125 }
861bbc80
AG
126
127 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 128 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
129}
130
93dd5e85 131/* Set up a shared TLB array with KVM */
1bc22652 132static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 133{
1bc22652
AF
134 CPUPPCState *env = &cpu->env;
135 CPUState *cs = CPU(cpu);
93dd5e85
SW
136 struct kvm_book3e_206_tlb_params params = {};
137 struct kvm_config_tlb cfg = {};
138 struct kvm_enable_cap encap = {};
139 unsigned int entries = 0;
140 int ret, i;
141
142 if (!kvm_enabled() ||
a60f24b5 143 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
144 return 0;
145 }
146
147 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
148
149 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
150 params.tlb_sizes[i] = booke206_tlb_size(env, i);
151 params.tlb_ways[i] = booke206_tlb_ways(env, i);
152 entries += params.tlb_sizes[i];
153 }
154
155 assert(entries == env->nb_tlb);
156 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
157
158 env->tlb_dirty = true;
159
160 cfg.array = (uintptr_t)env->tlb.tlbm;
161 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
162 cfg.params = (uintptr_t)&params;
163 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
164
165 encap.cap = KVM_CAP_SW_TLB;
166 encap.args[0] = (uintptr_t)&cfg;
167
1bc22652 168 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
93dd5e85
SW
169 if (ret < 0) {
170 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
171 __func__, strerror(-ret));
172 return ret;
173 }
174
175 env->kvm_sw_tlb = true;
176 return 0;
177}
178
4656e1f0
BH
179
180#if defined(TARGET_PPC64)
a60f24b5 181static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
182 struct kvm_ppc_smmu_info *info)
183{
a60f24b5
AF
184 CPUPPCState *env = &cpu->env;
185 CPUState *cs = CPU(cpu);
186
4656e1f0
BH
187 memset(info, 0, sizeof(*info));
188
189 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
190 * need to "guess" what the supported page sizes are.
191 *
192 * For that to work we make a few assumptions:
193 *
194 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
195 * KVM which only supports 4K and 16M pages, but supports them
196 * regardless of the backing store characteritics. We also don't
197 * support 1T segments.
198 *
199 * This is safe as if HV KVM ever supports that capability or PR
200 * KVM grows supports for more page/segment sizes, those versions
201 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
202 * will not hit this fallback
203 *
204 * - Else we are running HV KVM. This means we only support page
205 * sizes that fit in the backing store. Additionally we only
206 * advertize 64K pages if the processor is ARCH 2.06 and we assume
207 * P7 encodings for the SLB and hash table. Here too, we assume
208 * support for any newer processor will mean a kernel that
209 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
210 * this fallback.
211 */
a60f24b5 212 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
213 /* No flags */
214 info->flags = 0;
215 info->slb_size = 64;
216
217 /* Standard 4k base page size segment */
218 info->sps[0].page_shift = 12;
219 info->sps[0].slb_enc = 0;
220 info->sps[0].enc[0].page_shift = 12;
221 info->sps[0].enc[0].pte_enc = 0;
222
223 /* Standard 16M large page size segment */
224 info->sps[1].page_shift = 24;
225 info->sps[1].slb_enc = SLB_VSID_L;
226 info->sps[1].enc[0].page_shift = 24;
227 info->sps[1].enc[0].pte_enc = 0;
228 } else {
229 int i = 0;
230
231 /* HV KVM has backing store size restrictions */
232 info->flags = KVM_PPC_PAGE_SIZES_REAL;
233
234 if (env->mmu_model & POWERPC_MMU_1TSEG) {
235 info->flags |= KVM_PPC_1T_SEGMENTS;
236 }
237
238 if (env->mmu_model == POWERPC_MMU_2_06) {
239 info->slb_size = 32;
240 } else {
241 info->slb_size = 64;
242 }
243
244 /* Standard 4k base page size segment */
245 info->sps[i].page_shift = 12;
246 info->sps[i].slb_enc = 0;
247 info->sps[i].enc[0].page_shift = 12;
248 info->sps[i].enc[0].pte_enc = 0;
249 i++;
250
251 /* 64K on MMU 2.06 */
252 if (env->mmu_model == POWERPC_MMU_2_06) {
253 info->sps[i].page_shift = 16;
254 info->sps[i].slb_enc = 0x110;
255 info->sps[i].enc[0].page_shift = 16;
256 info->sps[i].enc[0].pte_enc = 1;
257 i++;
258 }
259
260 /* Standard 16M large page size segment */
261 info->sps[i].page_shift = 24;
262 info->sps[i].slb_enc = SLB_VSID_L;
263 info->sps[i].enc[0].page_shift = 24;
264 info->sps[i].enc[0].pte_enc = 0;
265 }
266}
267
a60f24b5 268static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 269{
a60f24b5 270 CPUState *cs = CPU(cpu);
4656e1f0
BH
271 int ret;
272
a60f24b5
AF
273 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
274 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
275 if (ret == 0) {
276 return;
277 }
278 }
279
a60f24b5 280 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
281}
282
283static long getrampagesize(void)
284{
285 struct statfs fs;
286 int ret;
287
288 if (!mem_path) {
289 /* guest RAM is backed by normal anonymous pages */
290 return getpagesize();
291 }
292
293 do {
294 ret = statfs(mem_path, &fs);
295 } while (ret != 0 && errno == EINTR);
296
297 if (ret != 0) {
298 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
299 strerror(errno));
300 exit(1);
301 }
302
303#define HUGETLBFS_MAGIC 0x958458f6
304
305 if (fs.f_type != HUGETLBFS_MAGIC) {
306 /* Explicit mempath, but it's ordinary pages */
307 return getpagesize();
308 }
309
310 /* It's hugepage, return the huge page size */
311 return fs.f_bsize;
312}
313
314static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
315{
316 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
317 return true;
318 }
319
320 return (1ul << shift) <= rampgsize;
321}
322
a60f24b5 323static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
324{
325 static struct kvm_ppc_smmu_info smmu_info;
326 static bool has_smmu_info;
a60f24b5 327 CPUPPCState *env = &cpu->env;
4656e1f0
BH
328 long rampagesize;
329 int iq, ik, jq, jk;
330
331 /* We only handle page sizes for 64-bit server guests for now */
332 if (!(env->mmu_model & POWERPC_MMU_64)) {
333 return;
334 }
335
336 /* Collect MMU info from kernel if not already */
337 if (!has_smmu_info) {
a60f24b5 338 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
339 has_smmu_info = true;
340 }
341
342 rampagesize = getrampagesize();
343
344 /* Convert to QEMU form */
345 memset(&env->sps, 0, sizeof(env->sps));
346
347 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
348 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
349 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
350
351 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
352 ksps->page_shift)) {
353 continue;
354 }
355 qsps->page_shift = ksps->page_shift;
356 qsps->slb_enc = ksps->slb_enc;
357 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
358 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
359 ksps->enc[jk].page_shift)) {
360 continue;
361 }
362 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
363 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
364 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
365 break;
366 }
367 }
368 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
369 break;
370 }
371 }
372 env->slb_nr = smmu_info.slb_size;
373 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
374 env->mmu_model |= POWERPC_MMU_1TSEG;
375 } else {
376 env->mmu_model &= ~POWERPC_MMU_1TSEG;
377 }
378}
379#else /* defined (TARGET_PPC64) */
380
a60f24b5 381static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
382{
383}
384
385#endif /* !defined (TARGET_PPC64) */
386
20d695a9 387int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 388{
20d695a9
AF
389 PowerPCCPU *cpu = POWERPC_CPU(cs);
390 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
391 int ret;
392
4656e1f0 393 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 394 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
395
396 /* Synchronize sregs with kvm */
1bc22652 397 ret = kvm_arch_sync_sregs(cpu);
5666ca4a
SW
398 if (ret) {
399 return ret;
400 }
861bbc80 401
d5a68146 402 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
c821c2bd 403
93dd5e85
SW
404 /* Some targets support access to KVM's guest TLB. */
405 switch (cenv->mmu_model) {
406 case POWERPC_MMU_BOOKE206:
1bc22652 407 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
408 break;
409 default:
410 break;
411 }
412
861bbc80 413 return ret;
d76d1650
AJ
414}
415
20d695a9 416void kvm_arch_reset_vcpu(CPUState *cpu)
caa5af0f
JK
417{
418}
419
1bc22652 420static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 421{
1bc22652
AF
422 CPUPPCState *env = &cpu->env;
423 CPUState *cs = CPU(cpu);
93dd5e85
SW
424 struct kvm_dirty_tlb dirty_tlb;
425 unsigned char *bitmap;
426 int ret;
427
428 if (!env->kvm_sw_tlb) {
429 return;
430 }
431
432 bitmap = g_malloc((env->nb_tlb + 7) / 8);
433 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
434
435 dirty_tlb.bitmap = (uintptr_t)bitmap;
436 dirty_tlb.num_dirty = env->nb_tlb;
437
1bc22652 438 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
439 if (ret) {
440 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
441 __func__, strerror(-ret));
442 }
443
444 g_free(bitmap);
445}
446
20d695a9 447int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 448{
20d695a9
AF
449 PowerPCCPU *cpu = POWERPC_CPU(cs);
450 CPUPPCState *env = &cpu->env;
d76d1650
AJ
451 struct kvm_regs regs;
452 int ret;
453 int i;
454
1bc22652
AF
455 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
456 if (ret < 0) {
d76d1650 457 return ret;
1bc22652 458 }
d76d1650
AJ
459
460 regs.ctr = env->ctr;
461 regs.lr = env->lr;
462 regs.xer = env->xer;
463 regs.msr = env->msr;
464 regs.pc = env->nip;
465
466 regs.srr0 = env->spr[SPR_SRR0];
467 regs.srr1 = env->spr[SPR_SRR1];
468
469 regs.sprg0 = env->spr[SPR_SPRG0];
470 regs.sprg1 = env->spr[SPR_SPRG1];
471 regs.sprg2 = env->spr[SPR_SPRG2];
472 regs.sprg3 = env->spr[SPR_SPRG3];
473 regs.sprg4 = env->spr[SPR_SPRG4];
474 regs.sprg5 = env->spr[SPR_SPRG5];
475 regs.sprg6 = env->spr[SPR_SPRG6];
476 regs.sprg7 = env->spr[SPR_SPRG7];
477
90dc8812
SW
478 regs.pid = env->spr[SPR_BOOKE_PID];
479
d76d1650
AJ
480 for (i = 0;i < 32; i++)
481 regs.gpr[i] = env->gpr[i];
482
1bc22652 483 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
484 if (ret < 0)
485 return ret;
486
93dd5e85 487 if (env->tlb_dirty) {
1bc22652 488 kvm_sw_tlb_put(cpu);
93dd5e85
SW
489 env->tlb_dirty = false;
490 }
491
f1af19d7
DG
492 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
493 struct kvm_sregs sregs;
494
495 sregs.pvr = env->spr[SPR_PVR];
496
497 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
498
499 /* Sync SLB */
500#ifdef TARGET_PPC64
501 for (i = 0; i < 64; i++) {
502 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
503 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
504 }
505#endif
506
507 /* Sync SRs */
508 for (i = 0; i < 16; i++) {
509 sregs.u.s.ppc32.sr[i] = env->sr[i];
510 }
511
512 /* Sync BATs */
513 for (i = 0; i < 8; i++) {
ef8beb0e
AG
514 /* Beware. We have to swap upper and lower bits here */
515 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
516 | env->DBAT[1][i];
517 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
518 | env->IBAT[1][i];
f1af19d7
DG
519 }
520
1bc22652 521 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
f1af19d7
DG
522 if (ret) {
523 return ret;
524 }
525 }
526
527 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
528 uint64_t hior = env->spr[SPR_HIOR];
529 struct kvm_one_reg reg = {
530 .id = KVM_REG_PPC_HIOR,
531 .addr = (uintptr_t) &hior,
532 };
533
1bc22652 534 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
f1af19d7
DG
535 if (ret) {
536 return ret;
537 }
538 }
539
d76d1650
AJ
540 return ret;
541}
542
20d695a9 543int kvm_arch_get_registers(CPUState *cs)
d76d1650 544{
20d695a9
AF
545 PowerPCCPU *cpu = POWERPC_CPU(cs);
546 CPUPPCState *env = &cpu->env;
d76d1650 547 struct kvm_regs regs;
ba5e5090 548 struct kvm_sregs sregs;
90dc8812 549 uint32_t cr;
138b38b6 550 int i, ret;
d76d1650 551
1bc22652 552 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
553 if (ret < 0)
554 return ret;
555
90dc8812
SW
556 cr = regs.cr;
557 for (i = 7; i >= 0; i--) {
558 env->crf[i] = cr & 15;
559 cr >>= 4;
560 }
ba5e5090 561
d76d1650
AJ
562 env->ctr = regs.ctr;
563 env->lr = regs.lr;
564 env->xer = regs.xer;
565 env->msr = regs.msr;
566 env->nip = regs.pc;
567
568 env->spr[SPR_SRR0] = regs.srr0;
569 env->spr[SPR_SRR1] = regs.srr1;
570
571 env->spr[SPR_SPRG0] = regs.sprg0;
572 env->spr[SPR_SPRG1] = regs.sprg1;
573 env->spr[SPR_SPRG2] = regs.sprg2;
574 env->spr[SPR_SPRG3] = regs.sprg3;
575 env->spr[SPR_SPRG4] = regs.sprg4;
576 env->spr[SPR_SPRG5] = regs.sprg5;
577 env->spr[SPR_SPRG6] = regs.sprg6;
578 env->spr[SPR_SPRG7] = regs.sprg7;
579
90dc8812
SW
580 env->spr[SPR_BOOKE_PID] = regs.pid;
581
d76d1650
AJ
582 for (i = 0;i < 32; i++)
583 env->gpr[i] = regs.gpr[i];
584
90dc8812 585 if (cap_booke_sregs) {
1bc22652 586 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
587 if (ret < 0) {
588 return ret;
589 }
590
591 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
592 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
593 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
594 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
595 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
596 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
597 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
598 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
599 env->spr[SPR_DECR] = sregs.u.e.dec;
600 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
601 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
602 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
603 }
604
605 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
606 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
607 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
608 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
609 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
610 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
611 }
612
613 if (sregs.u.e.features & KVM_SREGS_E_64) {
614 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
615 }
616
617 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
618 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
619 }
620
621 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
622 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
623 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
624 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
625 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
626 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
627 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
628 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
629 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
630 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
631 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
632 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
633 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
634 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
635 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
636 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
637 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
638
639 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
640 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
641 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
642 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
643 }
644
645 if (sregs.u.e.features & KVM_SREGS_E_PM) {
646 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
647 }
648
649 if (sregs.u.e.features & KVM_SREGS_E_PC) {
650 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
651 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
652 }
653 }
654
655 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
656 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
657 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
658 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
659 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
660 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
661 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
662 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
663 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
664 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
665 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
666 }
667
668 if (sregs.u.e.features & KVM_SREGS_EXP) {
669 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
670 }
671
672 if (sregs.u.e.features & KVM_SREGS_E_PD) {
673 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
674 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
675 }
676
677 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
678 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
679 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
680 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
681
682 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
683 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
684 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
685 }
686 }
fafc0b6a 687 }
90dc8812 688
90dc8812 689 if (cap_segstate) {
1bc22652 690 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
691 if (ret < 0) {
692 return ret;
693 }
694
bb593904 695 ppc_store_sdr1(env, sregs.u.s.sdr1);
ba5e5090
AG
696
697 /* Sync SLB */
82c09f2f 698#ifdef TARGET_PPC64
ba5e5090
AG
699 for (i = 0; i < 64; i++) {
700 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
701 sregs.u.s.ppc64.slb[i].slbv);
702 }
82c09f2f 703#endif
ba5e5090
AG
704
705 /* Sync SRs */
706 for (i = 0; i < 16; i++) {
707 env->sr[i] = sregs.u.s.ppc32.sr[i];
708 }
709
710 /* Sync BATs */
711 for (i = 0; i < 8; i++) {
712 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
713 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
714 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
715 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
716 }
fafc0b6a 717 }
ba5e5090 718
d76d1650
AJ
719 return 0;
720}
721
1bc22652 722int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
723{
724 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
725
726 if (irq != PPC_INTERRUPT_EXT) {
727 return 0;
728 }
729
730 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
731 return 0;
732 }
733
1bc22652 734 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
735
736 return 0;
737}
738
16415335
AG
739#if defined(TARGET_PPCEMB)
740#define PPC_INPUT_INT PPC40x_INPUT_INT
741#elif defined(TARGET_PPC64)
742#define PPC_INPUT_INT PPC970_INPUT_INT
743#else
744#define PPC_INPUT_INT PPC6xx_INPUT_INT
745#endif
746
20d695a9 747void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 748{
20d695a9
AF
749 PowerPCCPU *cpu = POWERPC_CPU(cs);
750 CPUPPCState *env = &cpu->env;
d76d1650
AJ
751 int r;
752 unsigned irq;
753
5cbdb3a3 754 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 755 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
756 if (!cap_interrupt_level &&
757 run->ready_for_interrupt_injection &&
d76d1650 758 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 759 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
760 {
761 /* For now KVM disregards the 'irq' argument. However, in the
762 * future KVM could cache it in-kernel to avoid a heavyweight exit
763 * when reading the UIC.
764 */
fc87e185 765 irq = KVM_INTERRUPT_SET;
d76d1650
AJ
766
767 dprintf("injected interrupt %d\n", irq);
1bc22652 768 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
769 if (r < 0) {
770 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
771 }
c821c2bd
AG
772
773 /* Always wake up soon in case the interrupt was level based */
74475455 774 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
c821c2bd 775 (get_ticks_per_sec() / 50));
d76d1650
AJ
776 }
777
778 /* We don't know if there are more interrupts pending after this. However,
779 * the guest will return to userspace in the course of handling this one
780 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
781}
782
20d695a9 783void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
d76d1650 784{
d76d1650
AJ
785}
786
20d695a9 787int kvm_arch_process_async_events(CPUState *cs)
0af691d7 788{
20d695a9
AF
789 PowerPCCPU *cpu = POWERPC_CPU(cs);
790 return cpu->env.halted;
0af691d7
MT
791}
792
1328c2bf 793static int kvmppc_handle_halt(CPUPPCState *env)
d76d1650
AJ
794{
795 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
796 env->halted = 1;
797 env->exception_index = EXCP_HLT;
798 }
799
bb4ea393 800 return 0;
d76d1650
AJ
801}
802
803/* map dcr access to existing qemu dcr emulation */
1328c2bf 804static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
805{
806 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
807 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
808
bb4ea393 809 return 0;
d76d1650
AJ
810}
811
1328c2bf 812static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
813{
814 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
815 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
816
bb4ea393 817 return 0;
d76d1650
AJ
818}
819
20d695a9 820int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 821{
20d695a9
AF
822 PowerPCCPU *cpu = POWERPC_CPU(cs);
823 CPUPPCState *env = &cpu->env;
bb4ea393 824 int ret;
d76d1650
AJ
825
826 switch (run->exit_reason) {
827 case KVM_EXIT_DCR:
828 if (run->dcr.is_write) {
829 dprintf("handle dcr write\n");
830 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
831 } else {
832 dprintf("handle dcr read\n");
833 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
834 }
835 break;
836 case KVM_EXIT_HLT:
837 dprintf("handle halt\n");
838 ret = kvmppc_handle_halt(env);
839 break;
f61b4bed
AG
840#ifdef CONFIG_PSERIES
841 case KVM_EXIT_PAPR_HCALL:
842 dprintf("handle PAPR hypercall\n");
20d695a9 843 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 844 run->papr_hcall.nr,
f61b4bed 845 run->papr_hcall.args);
78e8fde2 846 ret = 0;
f61b4bed
AG
847 break;
848#endif
5b95b8b9
AG
849 case KVM_EXIT_EPR:
850 dprintf("handle epr\n");
851 run->epr.epr = ldl_phys(env->mpic_iack);
852 ret = 0;
853 break;
73aaec4a
JK
854 default:
855 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
856 ret = -1;
857 break;
d76d1650
AJ
858 }
859
860 return ret;
861}
862
dc333cd6
AG
863static int read_cpuinfo(const char *field, char *value, int len)
864{
865 FILE *f;
866 int ret = -1;
867 int field_len = strlen(field);
868 char line[512];
869
870 f = fopen("/proc/cpuinfo", "r");
871 if (!f) {
872 return -1;
873 }
874
875 do {
876 if(!fgets(line, sizeof(line), f)) {
877 break;
878 }
879 if (!strncmp(line, field, field_len)) {
ae215068 880 pstrcpy(value, len, line);
dc333cd6
AG
881 ret = 0;
882 break;
883 }
884 } while(*line);
885
886 fclose(f);
887
888 return ret;
889}
890
891uint32_t kvmppc_get_tbfreq(void)
892{
893 char line[512];
894 char *ns;
895 uint32_t retval = get_ticks_per_sec();
896
897 if (read_cpuinfo("timebase", line, sizeof(line))) {
898 return retval;
899 }
900
901 if (!(ns = strchr(line, ':'))) {
902 return retval;
903 }
904
905 ns++;
906
907 retval = atoi(ns);
908 return retval;
909}
4513d923 910
eadaada1
AG
911/* Try to find a device tree node for a CPU with clock-frequency property */
912static int kvmppc_find_cpu_dt(char *buf, int buf_len)
913{
914 struct dirent *dirp;
915 DIR *dp;
916
917 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
918 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
919 return -1;
920 }
921
922 buf[0] = '\0';
923 while ((dirp = readdir(dp)) != NULL) {
924 FILE *f;
925 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
926 dirp->d_name);
927 f = fopen(buf, "r");
928 if (f) {
929 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
930 fclose(f);
931 break;
932 }
933 buf[0] = '\0';
934 }
935 closedir(dp);
936 if (buf[0] == '\0') {
937 printf("Unknown host!\n");
938 return -1;
939 }
940
941 return 0;
942}
943
9bc884b7
DG
944/* Read a CPU node property from the host device tree that's a single
945 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
946 * (can't find or open the property, or doesn't understand the
947 * format) */
948static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 949{
9bc884b7
DG
950 char buf[PATH_MAX];
951 union {
952 uint32_t v32;
953 uint64_t v64;
954 } u;
eadaada1
AG
955 FILE *f;
956 int len;
957
958 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 959 return -1;
eadaada1
AG
960 }
961
9bc884b7
DG
962 strncat(buf, "/", sizeof(buf) - strlen(buf));
963 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
964
965 f = fopen(buf, "rb");
966 if (!f) {
967 return -1;
968 }
969
9bc884b7 970 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
971 fclose(f);
972 switch (len) {
9bc884b7
DG
973 case 4:
974 /* property is a 32-bit quantity */
975 return be32_to_cpu(u.v32);
976 case 8:
977 return be64_to_cpu(u.v64);
eadaada1
AG
978 }
979
980 return 0;
981}
982
9bc884b7
DG
983uint64_t kvmppc_get_clockfreq(void)
984{
985 return kvmppc_read_int_cpu_dt("clock-frequency");
986}
987
6659394f
DG
988uint32_t kvmppc_get_vmx(void)
989{
990 return kvmppc_read_int_cpu_dt("ibm,vmx");
991}
992
993uint32_t kvmppc_get_dfp(void)
994{
995 return kvmppc_read_int_cpu_dt("ibm,dfp");
996}
997
1a61a9ae
SY
998static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
999 {
1000 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1001 CPUState *cs = CPU(cpu);
1002
1003 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1004 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1005 return 0;
1006 }
1007
1008 return 1;
1009}
1010
1011int kvmppc_get_hasidle(CPUPPCState *env)
1012{
1013 struct kvm_ppc_pvinfo pvinfo;
1014
1015 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1016 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1017 return 1;
1018 }
1019
1020 return 0;
1021}
1022
1328c2bf 1023int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1024{
1025 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1026 struct kvm_ppc_pvinfo pvinfo;
1027
1a61a9ae 1028 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1029 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1030 return 0;
1031 }
45024f09
AG
1032
1033 /*
1034 * Fallback to always fail hypercalls:
1035 *
1036 * li r3, -1
1037 * nop
1038 * nop
1039 * nop
1040 */
1041
1042 hc[0] = 0x3860ffff;
1043 hc[1] = 0x60000000;
1044 hc[2] = 0x60000000;
1045 hc[3] = 0x60000000;
1046
1047 return 0;
1048}
1049
1bc22652 1050void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 1051{
1bc22652
AF
1052 CPUPPCState *env = &cpu->env;
1053 CPUState *cs = CPU(cpu);
94135e81 1054 struct kvm_enable_cap cap = {};
f61b4bed
AG
1055 int ret;
1056
f61b4bed 1057 cap.cap = KVM_CAP_PPC_PAPR;
1bc22652 1058 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
f61b4bed
AG
1059
1060 if (ret) {
f1af19d7 1061 cpu_abort(env, "This KVM version does not support PAPR\n");
94135e81 1062 }
f61b4bed
AG
1063}
1064
5b95b8b9
AG
1065void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1066{
1067 CPUPPCState *env = &cpu->env;
1068 CPUState *cs = CPU(cpu);
1069 struct kvm_enable_cap cap = {};
1070 int ret;
1071
1072 cap.cap = KVM_CAP_PPC_EPR;
1073 cap.args[0] = mpic_proxy;
1074 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1075
1076 if (ret && mpic_proxy) {
1077 cpu_abort(env, "This KVM version does not support EPR\n");
1078 }
1079}
1080
e97c3636
DG
1081int kvmppc_smt_threads(void)
1082{
1083 return cap_ppc_smt ? cap_ppc_smt : 1;
1084}
1085
7f763a5d 1086#ifdef TARGET_PPC64
354ac20a
DG
1087off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1088{
1089 void *rma;
1090 off_t size;
1091 int fd;
1092 struct kvm_allocate_rma ret;
1093 MemoryRegion *rma_region;
1094
1095 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1096 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1097 * not necessary on this hardware
1098 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1099 *
1100 * FIXME: We should allow the user to force contiguous RMA
1101 * allocation in the cap_ppc_rma==1 case.
1102 */
1103 if (cap_ppc_rma < 2) {
1104 return 0;
1105 }
1106
1107 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1108 if (fd < 0) {
1109 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1110 strerror(errno));
1111 return -1;
1112 }
1113
1114 size = MIN(ret.rma_size, 256ul << 20);
1115
1116 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1117 if (rma == MAP_FAILED) {
1118 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1119 return -1;
1120 };
1121
1122 rma_region = g_new(MemoryRegion, 1);
6148b23d
AK
1123 memory_region_init_ram_ptr(rma_region, name, size, rma);
1124 vmstate_register_ram_global(rma_region);
354ac20a
DG
1125 memory_region_add_subregion(sysmem, 0, rma_region);
1126
1127 return size;
1128}
1129
7f763a5d
DG
1130uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1131{
1132 if (cap_ppc_rma >= 2) {
1133 return current_size;
1134 }
1135 return MIN(current_size,
1136 getrampagesize() << (hash_shift - 7));
1137}
1138#endif
1139
0f5cb298
DG
1140void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1141{
1142 struct kvm_create_spapr_tce args = {
1143 .liobn = liobn,
1144 .window_size = window_size,
1145 };
1146 long len;
1147 int fd;
1148 void *table;
1149
b5aec396
DG
1150 /* Must set fd to -1 so we don't try to munmap when called for
1151 * destroying the table, which the upper layers -will- do
1152 */
1153 *pfd = -1;
0f5cb298
DG
1154 if (!cap_spapr_tce) {
1155 return NULL;
1156 }
1157
1158 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1159 if (fd < 0) {
b5aec396
DG
1160 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1161 liobn);
0f5cb298
DG
1162 return NULL;
1163 }
1164
ad0ebb91 1165 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
0f5cb298
DG
1166 /* FIXME: round this up to page size */
1167
74b41e56 1168 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1169 if (table == MAP_FAILED) {
b5aec396
DG
1170 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1171 liobn);
0f5cb298
DG
1172 close(fd);
1173 return NULL;
1174 }
1175
1176 *pfd = fd;
1177 return table;
1178}
1179
1180int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1181{
1182 long len;
1183
1184 if (fd < 0) {
1185 return -1;
1186 }
1187
ad0ebb91 1188 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
0f5cb298
DG
1189 if ((munmap(table, len) < 0) ||
1190 (close(fd) < 0)) {
b5aec396
DG
1191 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1192 strerror(errno));
0f5cb298
DG
1193 /* Leak the table */
1194 }
1195
1196 return 0;
1197}
1198
7f763a5d
DG
1199int kvmppc_reset_htab(int shift_hint)
1200{
1201 uint32_t shift = shift_hint;
1202
ace9a2cb
DG
1203 if (!kvm_enabled()) {
1204 /* Full emulation, tell caller to allocate htab itself */
1205 return 0;
1206 }
1207 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
1208 int ret;
1209 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
1210 if (ret == -ENOTTY) {
1211 /* At least some versions of PR KVM advertise the
1212 * capability, but don't implement the ioctl(). Oops.
1213 * Return 0 so that we allocate the htab in qemu, as is
1214 * correct for PR. */
1215 return 0;
1216 } else if (ret < 0) {
7f763a5d
DG
1217 return ret;
1218 }
1219 return shift;
1220 }
1221
ace9a2cb
DG
1222 /* We have a kernel that predates the htab reset calls. For PR
1223 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1224 * this era, it has allocated a 16MB fixed size hash table
1225 * already. Kernels of this era have the GET_PVINFO capability
1226 * only on PR, so we use this hack to determine the right
1227 * answer */
1228 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1229 /* PR - tell caller to allocate htab */
1230 return 0;
1231 } else {
1232 /* HV - assume 16MB kernel allocated htab */
1233 return 24;
1234 }
7f763a5d
DG
1235}
1236
a1e98583
DG
1237static inline uint32_t mfpvr(void)
1238{
1239 uint32_t pvr;
1240
1241 asm ("mfpvr %0"
1242 : "=r"(pvr));
1243 return pvr;
1244}
1245
a7342588
DG
1246static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1247{
1248 if (on) {
1249 *word |= flags;
1250 } else {
1251 *word &= ~flags;
1252 }
1253}
1254
2985b86b 1255static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 1256{
1b7ce68f
AF
1257 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(obj);
1258
2985b86b 1259 assert(kvm_enabled());
1b7ce68f
AF
1260
1261 if (pcc->info->pvr != mfpvr()) {
1262 fprintf(stderr, "Your host CPU is unsupported.\n"
1263 "Please choose a supported model instead, see -cpu ?.\n");
1264 exit(1);
1265 }
2985b86b
AF
1266}
1267
1268static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1269{
1270 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a1e98583 1271 uint32_t host_pvr = mfpvr();
2985b86b 1272 PowerPCCPUClass *pvr_pcc;
a7342588
DG
1273 ppc_def_t *spec;
1274 uint32_t vmx = kvmppc_get_vmx();
1275 uint32_t dfp = kvmppc_get_dfp();
a1e98583 1276
a7342588 1277 spec = g_malloc0(sizeof(*spec));
2985b86b
AF
1278
1279 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1280 if (pvr_pcc != NULL) {
1281 memcpy(spec, pvr_pcc->info, sizeof(*spec));
1282 }
1283 pcc->info = spec;
1284 /* Override the display name for -cpu ? and QMP */
1285 pcc->info->name = "host";
a7342588
DG
1286
1287 /* Now fix up the spec with information we can query from the host */
1288
70bca53f
AG
1289 if (vmx != -1) {
1290 /* Only override when we know what the host supports */
1291 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1292 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1293 }
1294 if (dfp != -1) {
1295 /* Only override when we know what the host supports */
1296 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1297 }
a1e98583
DG
1298}
1299
55e5c285 1300int kvmppc_fixup_cpu(PowerPCCPU *cpu)
12b1143b 1301{
55e5c285 1302 CPUState *cs = CPU(cpu);
12b1143b
DG
1303 int smt;
1304
1305 /* Adjust cpu index for SMT */
1306 smt = kvmppc_smt_threads();
55e5c285
AF
1307 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1308 + (cs->cpu_index % smp_threads);
12b1143b
DG
1309
1310 return 0;
1311}
1312
1313
20d695a9 1314bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
1315{
1316 return true;
1317}
a1b87fe0 1318
20d695a9 1319int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
1320{
1321 return 1;
1322}
1323
1324int kvm_arch_on_sigbus(int code, void *addr)
1325{
1326 return 1;
1327}
2985b86b
AF
1328
1329static const TypeInfo kvm_host_cpu_type_info = {
1330 .name = TYPE_HOST_POWERPC_CPU,
1331 .parent = TYPE_POWERPC_CPU,
1332 .instance_init = kvmppc_host_cpu_initfn,
1333 .class_init = kvmppc_host_cpu_class_init,
1334};
1335
1336static void kvm_ppc_register_types(void)
1337{
1338 type_register_static(&kvm_host_cpu_type_info);
1339}
1340
1341type_init(kvm_ppc_register_types)