]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/kvm.c
spapr: Enable dynamic change of the supported hypercalls list
[mirror_qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
1de7afc9 26#include "qemu/timer.h"
9c17d615
PB
27#include "sysemu/sysemu.h"
28#include "sysemu/kvm.h"
d76d1650
AJ
29#include "kvm_ppc.h"
30#include "cpu.h"
9c17d615
PB
31#include "sysemu/cpus.h"
32#include "sysemu/device_tree.h"
d5aea6f3 33#include "mmu-hash64.h"
d76d1650 34
f61b4bed 35#include "hw/sysbus.h"
0d09e41a
PB
36#include "hw/ppc/spapr.h"
37#include "hw/ppc/spapr_vio.h"
98a8b524 38#include "hw/ppc/ppc.h"
31f2cb8f 39#include "sysemu/watchdog.h"
b36f100e 40#include "trace.h"
f61b4bed 41
d76d1650
AJ
42//#define DEBUG_KVM
43
44#ifdef DEBUG_KVM
da56ff91 45#define DPRINTF(fmt, ...) \
d76d1650
AJ
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47#else
da56ff91 48#define DPRINTF(fmt, ...) \
d76d1650
AJ
49 do { } while (0)
50#endif
51
eadaada1
AG
52#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53
94a8d39a
JK
54const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
56};
57
fc87e185
AG
58static int cap_interrupt_unset = false;
59static int cap_interrupt_level = false;
90dc8812 60static int cap_segstate;
90dc8812 61static int cap_booke_sregs;
e97c3636 62static int cap_ppc_smt;
354ac20a 63static int cap_ppc_rma;
0f5cb298 64static int cap_spapr_tce;
f1af19d7 65static int cap_hior;
d67d40ea 66static int cap_one_reg;
3b961124 67static int cap_epr;
31f2cb8f 68static int cap_ppc_watchdog;
9b00ea49 69static int cap_papr;
e68cb8b4 70static int cap_htab_fd;
fc87e185 71
c821c2bd
AG
72/* XXX We have a race condition where we actually have a level triggered
73 * interrupt, but the infrastructure can't expose that yet, so the guest
74 * takes but ignores it, goes to sleep and never gets notified that there's
75 * still an interrupt pending.
c6a94ba5 76 *
c821c2bd
AG
77 * As a quick workaround, let's just wake up again 20 ms after we injected
78 * an interrupt. That way we can assure that we're always reinjecting
79 * interrupts in case the guest swallowed them.
c6a94ba5
AG
80 */
81static QEMUTimer *idle_timer;
82
d5a68146 83static void kvm_kick_cpu(void *opaque)
c6a94ba5 84{
d5a68146 85 PowerPCCPU *cpu = opaque;
d5a68146 86
c08d7424 87 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
88}
89
5ba4576b
AF
90static int kvm_ppc_register_host_cpu_type(void);
91
cad1e282 92int kvm_arch_init(KVMState *s)
d76d1650 93{
fc87e185 94 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 95 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 96 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 97 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 98 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 99 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 100 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
d67d40ea 101 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 102 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
3b961124 103 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
31f2cb8f 104 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
9b00ea49
DG
105 /* Note: we don't set cap_papr here, because this capability is
106 * only activated after this by kvmppc_set_papr() */
e68cb8b4 107 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
fc87e185
AG
108
109 if (!cap_interrupt_level) {
110 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
111 "VM to stall at times!\n");
112 }
113
5ba4576b
AF
114 kvm_ppc_register_host_cpu_type();
115
d76d1650
AJ
116 return 0;
117}
118
1bc22652 119static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 120{
1bc22652
AF
121 CPUPPCState *cenv = &cpu->env;
122 CPUState *cs = CPU(cpu);
861bbc80 123 struct kvm_sregs sregs;
5666ca4a
SW
124 int ret;
125
126 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
127 /* What we're really trying to say is "if we're on BookE, we use
128 the native PVR for now". This is the only sane way to check
129 it though, so we potentially confuse users that they can run
130 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
131 return 0;
132 } else {
90dc8812 133 if (!cap_segstate) {
64e07be5
AG
134 fprintf(stderr, "kvm error: missing PVR setting capability\n");
135 return -ENOSYS;
5666ca4a 136 }
5666ca4a
SW
137 }
138
1bc22652 139 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
140 if (ret) {
141 return ret;
142 }
861bbc80
AG
143
144 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 145 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
146}
147
93dd5e85 148/* Set up a shared TLB array with KVM */
1bc22652 149static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 150{
1bc22652
AF
151 CPUPPCState *env = &cpu->env;
152 CPUState *cs = CPU(cpu);
93dd5e85
SW
153 struct kvm_book3e_206_tlb_params params = {};
154 struct kvm_config_tlb cfg = {};
93dd5e85
SW
155 unsigned int entries = 0;
156 int ret, i;
157
158 if (!kvm_enabled() ||
a60f24b5 159 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
160 return 0;
161 }
162
163 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
164
165 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
166 params.tlb_sizes[i] = booke206_tlb_size(env, i);
167 params.tlb_ways[i] = booke206_tlb_ways(env, i);
168 entries += params.tlb_sizes[i];
169 }
170
171 assert(entries == env->nb_tlb);
172 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
173
174 env->tlb_dirty = true;
175
176 cfg.array = (uintptr_t)env->tlb.tlbm;
177 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
178 cfg.params = (uintptr_t)&params;
179 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
180
48add816 181 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
93dd5e85
SW
182 if (ret < 0) {
183 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184 __func__, strerror(-ret));
185 return ret;
186 }
187
188 env->kvm_sw_tlb = true;
189 return 0;
190}
191
4656e1f0
BH
192
193#if defined(TARGET_PPC64)
a60f24b5 194static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
195 struct kvm_ppc_smmu_info *info)
196{
a60f24b5
AF
197 CPUPPCState *env = &cpu->env;
198 CPUState *cs = CPU(cpu);
199
4656e1f0
BH
200 memset(info, 0, sizeof(*info));
201
202 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203 * need to "guess" what the supported page sizes are.
204 *
205 * For that to work we make a few assumptions:
206 *
207 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208 * KVM which only supports 4K and 16M pages, but supports them
209 * regardless of the backing store characteritics. We also don't
210 * support 1T segments.
211 *
212 * This is safe as if HV KVM ever supports that capability or PR
213 * KVM grows supports for more page/segment sizes, those versions
214 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215 * will not hit this fallback
216 *
217 * - Else we are running HV KVM. This means we only support page
218 * sizes that fit in the backing store. Additionally we only
219 * advertize 64K pages if the processor is ARCH 2.06 and we assume
220 * P7 encodings for the SLB and hash table. Here too, we assume
221 * support for any newer processor will mean a kernel that
222 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
223 * this fallback.
224 */
a60f24b5 225 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
226 /* No flags */
227 info->flags = 0;
228 info->slb_size = 64;
229
230 /* Standard 4k base page size segment */
231 info->sps[0].page_shift = 12;
232 info->sps[0].slb_enc = 0;
233 info->sps[0].enc[0].page_shift = 12;
234 info->sps[0].enc[0].pte_enc = 0;
235
236 /* Standard 16M large page size segment */
237 info->sps[1].page_shift = 24;
238 info->sps[1].slb_enc = SLB_VSID_L;
239 info->sps[1].enc[0].page_shift = 24;
240 info->sps[1].enc[0].pte_enc = 0;
241 } else {
242 int i = 0;
243
244 /* HV KVM has backing store size restrictions */
245 info->flags = KVM_PPC_PAGE_SIZES_REAL;
246
247 if (env->mmu_model & POWERPC_MMU_1TSEG) {
248 info->flags |= KVM_PPC_1T_SEGMENTS;
249 }
250
251 if (env->mmu_model == POWERPC_MMU_2_06) {
252 info->slb_size = 32;
253 } else {
254 info->slb_size = 64;
255 }
256
257 /* Standard 4k base page size segment */
258 info->sps[i].page_shift = 12;
259 info->sps[i].slb_enc = 0;
260 info->sps[i].enc[0].page_shift = 12;
261 info->sps[i].enc[0].pte_enc = 0;
262 i++;
263
264 /* 64K on MMU 2.06 */
265 if (env->mmu_model == POWERPC_MMU_2_06) {
266 info->sps[i].page_shift = 16;
267 info->sps[i].slb_enc = 0x110;
268 info->sps[i].enc[0].page_shift = 16;
269 info->sps[i].enc[0].pte_enc = 1;
270 i++;
271 }
272
273 /* Standard 16M large page size segment */
274 info->sps[i].page_shift = 24;
275 info->sps[i].slb_enc = SLB_VSID_L;
276 info->sps[i].enc[0].page_shift = 24;
277 info->sps[i].enc[0].pte_enc = 0;
278 }
279}
280
a60f24b5 281static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 282{
a60f24b5 283 CPUState *cs = CPU(cpu);
4656e1f0
BH
284 int ret;
285
a60f24b5
AF
286 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
287 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
288 if (ret == 0) {
289 return;
290 }
291 }
292
a60f24b5 293 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
294}
295
296static long getrampagesize(void)
297{
298 struct statfs fs;
299 int ret;
300
301 if (!mem_path) {
302 /* guest RAM is backed by normal anonymous pages */
303 return getpagesize();
304 }
305
306 do {
307 ret = statfs(mem_path, &fs);
308 } while (ret != 0 && errno == EINTR);
309
310 if (ret != 0) {
311 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
312 strerror(errno));
313 exit(1);
314 }
315
316#define HUGETLBFS_MAGIC 0x958458f6
317
318 if (fs.f_type != HUGETLBFS_MAGIC) {
319 /* Explicit mempath, but it's ordinary pages */
320 return getpagesize();
321 }
322
323 /* It's hugepage, return the huge page size */
324 return fs.f_bsize;
325}
326
327static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
328{
329 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
330 return true;
331 }
332
333 return (1ul << shift) <= rampgsize;
334}
335
a60f24b5 336static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
337{
338 static struct kvm_ppc_smmu_info smmu_info;
339 static bool has_smmu_info;
a60f24b5 340 CPUPPCState *env = &cpu->env;
4656e1f0
BH
341 long rampagesize;
342 int iq, ik, jq, jk;
343
344 /* We only handle page sizes for 64-bit server guests for now */
345 if (!(env->mmu_model & POWERPC_MMU_64)) {
346 return;
347 }
348
349 /* Collect MMU info from kernel if not already */
350 if (!has_smmu_info) {
a60f24b5 351 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
352 has_smmu_info = true;
353 }
354
355 rampagesize = getrampagesize();
356
357 /* Convert to QEMU form */
358 memset(&env->sps, 0, sizeof(env->sps));
359
08215d8f
AG
360 /*
361 * XXX This loop should be an entry wide AND of the capabilities that
362 * the selected CPU has with the capabilities that KVM supports.
363 */
4656e1f0
BH
364 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
365 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
366 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
367
368 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
369 ksps->page_shift)) {
370 continue;
371 }
372 qsps->page_shift = ksps->page_shift;
373 qsps->slb_enc = ksps->slb_enc;
374 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
375 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
376 ksps->enc[jk].page_shift)) {
377 continue;
378 }
379 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
380 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
381 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
382 break;
383 }
384 }
385 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
386 break;
387 }
388 }
389 env->slb_nr = smmu_info.slb_size;
08215d8f 390 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
4656e1f0
BH
391 env->mmu_model &= ~POWERPC_MMU_1TSEG;
392 }
393}
394#else /* defined (TARGET_PPC64) */
395
a60f24b5 396static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
397{
398}
399
400#endif /* !defined (TARGET_PPC64) */
401
b164e48e
EH
402unsigned long kvm_arch_vcpu_id(CPUState *cpu)
403{
0f20ba62 404 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
b164e48e
EH
405}
406
20d695a9 407int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 408{
20d695a9
AF
409 PowerPCCPU *cpu = POWERPC_CPU(cs);
410 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
411 int ret;
412
4656e1f0 413 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 414 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
415
416 /* Synchronize sregs with kvm */
1bc22652 417 ret = kvm_arch_sync_sregs(cpu);
5666ca4a
SW
418 if (ret) {
419 return ret;
420 }
861bbc80 421
bc72ad67 422 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
c821c2bd 423
93dd5e85
SW
424 /* Some targets support access to KVM's guest TLB. */
425 switch (cenv->mmu_model) {
426 case POWERPC_MMU_BOOKE206:
1bc22652 427 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
428 break;
429 default:
430 break;
431 }
432
861bbc80 433 return ret;
d76d1650
AJ
434}
435
1bc22652 436static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 437{
1bc22652
AF
438 CPUPPCState *env = &cpu->env;
439 CPUState *cs = CPU(cpu);
93dd5e85
SW
440 struct kvm_dirty_tlb dirty_tlb;
441 unsigned char *bitmap;
442 int ret;
443
444 if (!env->kvm_sw_tlb) {
445 return;
446 }
447
448 bitmap = g_malloc((env->nb_tlb + 7) / 8);
449 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
450
451 dirty_tlb.bitmap = (uintptr_t)bitmap;
452 dirty_tlb.num_dirty = env->nb_tlb;
453
1bc22652 454 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
455 if (ret) {
456 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
457 __func__, strerror(-ret));
458 }
459
460 g_free(bitmap);
461}
462
d67d40ea
DG
463static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
464{
465 PowerPCCPU *cpu = POWERPC_CPU(cs);
466 CPUPPCState *env = &cpu->env;
467 union {
468 uint32_t u32;
469 uint64_t u64;
470 } val;
471 struct kvm_one_reg reg = {
472 .id = id,
473 .addr = (uintptr_t) &val,
474 };
475 int ret;
476
477 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
478 if (ret != 0) {
b36f100e 479 trace_kvm_failed_spr_get(spr, strerror(errno));
d67d40ea
DG
480 } else {
481 switch (id & KVM_REG_SIZE_MASK) {
482 case KVM_REG_SIZE_U32:
483 env->spr[spr] = val.u32;
484 break;
485
486 case KVM_REG_SIZE_U64:
487 env->spr[spr] = val.u64;
488 break;
489
490 default:
491 /* Don't handle this size yet */
492 abort();
493 }
494 }
495}
496
497static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
498{
499 PowerPCCPU *cpu = POWERPC_CPU(cs);
500 CPUPPCState *env = &cpu->env;
501 union {
502 uint32_t u32;
503 uint64_t u64;
504 } val;
505 struct kvm_one_reg reg = {
506 .id = id,
507 .addr = (uintptr_t) &val,
508 };
509 int ret;
510
511 switch (id & KVM_REG_SIZE_MASK) {
512 case KVM_REG_SIZE_U32:
513 val.u32 = env->spr[spr];
514 break;
515
516 case KVM_REG_SIZE_U64:
517 val.u64 = env->spr[spr];
518 break;
519
520 default:
521 /* Don't handle this size yet */
522 abort();
523 }
524
525 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
526 if (ret != 0) {
b36f100e 527 trace_kvm_failed_spr_set(spr, strerror(errno));
d67d40ea
DG
528 }
529}
530
70b79849
DG
531static int kvm_put_fp(CPUState *cs)
532{
533 PowerPCCPU *cpu = POWERPC_CPU(cs);
534 CPUPPCState *env = &cpu->env;
535 struct kvm_one_reg reg;
536 int i;
537 int ret;
538
539 if (env->insns_flags & PPC_FLOAT) {
540 uint64_t fpscr = env->fpscr;
541 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
542
543 reg.id = KVM_REG_PPC_FPSCR;
544 reg.addr = (uintptr_t)&fpscr;
545 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
546 if (ret < 0) {
da56ff91 547 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
70b79849
DG
548 return ret;
549 }
550
551 for (i = 0; i < 32; i++) {
552 uint64_t vsr[2];
553
554 vsr[0] = float64_val(env->fpr[i]);
555 vsr[1] = env->vsr[i];
556 reg.addr = (uintptr_t) &vsr;
557 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
558
559 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
560 if (ret < 0) {
da56ff91 561 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
70b79849
DG
562 i, strerror(errno));
563 return ret;
564 }
565 }
566 }
567
568 if (env->insns_flags & PPC_ALTIVEC) {
569 reg.id = KVM_REG_PPC_VSCR;
570 reg.addr = (uintptr_t)&env->vscr;
571 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
572 if (ret < 0) {
da56ff91 573 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
70b79849
DG
574 return ret;
575 }
576
577 for (i = 0; i < 32; i++) {
578 reg.id = KVM_REG_PPC_VR(i);
579 reg.addr = (uintptr_t)&env->avr[i];
580 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
581 if (ret < 0) {
da56ff91 582 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
70b79849
DG
583 return ret;
584 }
585 }
586 }
587
588 return 0;
589}
590
591static int kvm_get_fp(CPUState *cs)
592{
593 PowerPCCPU *cpu = POWERPC_CPU(cs);
594 CPUPPCState *env = &cpu->env;
595 struct kvm_one_reg reg;
596 int i;
597 int ret;
598
599 if (env->insns_flags & PPC_FLOAT) {
600 uint64_t fpscr;
601 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
602
603 reg.id = KVM_REG_PPC_FPSCR;
604 reg.addr = (uintptr_t)&fpscr;
605 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
606 if (ret < 0) {
da56ff91 607 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
70b79849
DG
608 return ret;
609 } else {
610 env->fpscr = fpscr;
611 }
612
613 for (i = 0; i < 32; i++) {
614 uint64_t vsr[2];
615
616 reg.addr = (uintptr_t) &vsr;
617 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
618
619 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
620 if (ret < 0) {
da56ff91 621 DPRINTF("Unable to get %s%d from KVM: %s\n",
70b79849
DG
622 vsx ? "VSR" : "FPR", i, strerror(errno));
623 return ret;
624 } else {
625 env->fpr[i] = vsr[0];
626 if (vsx) {
627 env->vsr[i] = vsr[1];
628 }
629 }
630 }
631 }
632
633 if (env->insns_flags & PPC_ALTIVEC) {
634 reg.id = KVM_REG_PPC_VSCR;
635 reg.addr = (uintptr_t)&env->vscr;
636 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637 if (ret < 0) {
da56ff91 638 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
70b79849
DG
639 return ret;
640 }
641
642 for (i = 0; i < 32; i++) {
643 reg.id = KVM_REG_PPC_VR(i);
644 reg.addr = (uintptr_t)&env->avr[i];
645 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
646 if (ret < 0) {
da56ff91 647 DPRINTF("Unable to get VR%d from KVM: %s\n",
70b79849
DG
648 i, strerror(errno));
649 return ret;
650 }
651 }
652 }
653
654 return 0;
655}
656
9b00ea49
DG
657#if defined(TARGET_PPC64)
658static int kvm_get_vpa(CPUState *cs)
659{
660 PowerPCCPU *cpu = POWERPC_CPU(cs);
661 CPUPPCState *env = &cpu->env;
662 struct kvm_one_reg reg;
663 int ret;
664
665 reg.id = KVM_REG_PPC_VPA_ADDR;
666 reg.addr = (uintptr_t)&env->vpa_addr;
667 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
668 if (ret < 0) {
da56ff91 669 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
9b00ea49
DG
670 return ret;
671 }
672
673 assert((uintptr_t)&env->slb_shadow_size
674 == ((uintptr_t)&env->slb_shadow_addr + 8));
675 reg.id = KVM_REG_PPC_VPA_SLB;
676 reg.addr = (uintptr_t)&env->slb_shadow_addr;
677 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
678 if (ret < 0) {
da56ff91 679 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
9b00ea49
DG
680 strerror(errno));
681 return ret;
682 }
683
684 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
685 reg.id = KVM_REG_PPC_VPA_DTL;
686 reg.addr = (uintptr_t)&env->dtl_addr;
687 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
688 if (ret < 0) {
da56ff91 689 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
9b00ea49
DG
690 strerror(errno));
691 return ret;
692 }
693
694 return 0;
695}
696
697static int kvm_put_vpa(CPUState *cs)
698{
699 PowerPCCPU *cpu = POWERPC_CPU(cs);
700 CPUPPCState *env = &cpu->env;
701 struct kvm_one_reg reg;
702 int ret;
703
704 /* SLB shadow or DTL can't be registered unless a master VPA is
705 * registered. That means when restoring state, if a VPA *is*
706 * registered, we need to set that up first. If not, we need to
707 * deregister the others before deregistering the master VPA */
708 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
709
710 if (env->vpa_addr) {
711 reg.id = KVM_REG_PPC_VPA_ADDR;
712 reg.addr = (uintptr_t)&env->vpa_addr;
713 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
714 if (ret < 0) {
da56ff91 715 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
716 return ret;
717 }
718 }
719
720 assert((uintptr_t)&env->slb_shadow_size
721 == ((uintptr_t)&env->slb_shadow_addr + 8));
722 reg.id = KVM_REG_PPC_VPA_SLB;
723 reg.addr = (uintptr_t)&env->slb_shadow_addr;
724 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
725 if (ret < 0) {
da56ff91 726 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
9b00ea49
DG
727 return ret;
728 }
729
730 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
731 reg.id = KVM_REG_PPC_VPA_DTL;
732 reg.addr = (uintptr_t)&env->dtl_addr;
733 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
734 if (ret < 0) {
da56ff91 735 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
9b00ea49
DG
736 strerror(errno));
737 return ret;
738 }
739
740 if (!env->vpa_addr) {
741 reg.id = KVM_REG_PPC_VPA_ADDR;
742 reg.addr = (uintptr_t)&env->vpa_addr;
743 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
744 if (ret < 0) {
da56ff91 745 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
746 return ret;
747 }
748 }
749
750 return 0;
751}
752#endif /* TARGET_PPC64 */
753
20d695a9 754int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 755{
20d695a9
AF
756 PowerPCCPU *cpu = POWERPC_CPU(cs);
757 CPUPPCState *env = &cpu->env;
d76d1650
AJ
758 struct kvm_regs regs;
759 int ret;
760 int i;
761
1bc22652
AF
762 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
763 if (ret < 0) {
d76d1650 764 return ret;
1bc22652 765 }
d76d1650
AJ
766
767 regs.ctr = env->ctr;
768 regs.lr = env->lr;
da91a00f 769 regs.xer = cpu_read_xer(env);
d76d1650
AJ
770 regs.msr = env->msr;
771 regs.pc = env->nip;
772
773 regs.srr0 = env->spr[SPR_SRR0];
774 regs.srr1 = env->spr[SPR_SRR1];
775
776 regs.sprg0 = env->spr[SPR_SPRG0];
777 regs.sprg1 = env->spr[SPR_SPRG1];
778 regs.sprg2 = env->spr[SPR_SPRG2];
779 regs.sprg3 = env->spr[SPR_SPRG3];
780 regs.sprg4 = env->spr[SPR_SPRG4];
781 regs.sprg5 = env->spr[SPR_SPRG5];
782 regs.sprg6 = env->spr[SPR_SPRG6];
783 regs.sprg7 = env->spr[SPR_SPRG7];
784
90dc8812
SW
785 regs.pid = env->spr[SPR_BOOKE_PID];
786
d76d1650
AJ
787 for (i = 0;i < 32; i++)
788 regs.gpr[i] = env->gpr[i];
789
4bddaf55
AK
790 regs.cr = 0;
791 for (i = 0; i < 8; i++) {
792 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
793 }
794
1bc22652 795 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
796 if (ret < 0)
797 return ret;
798
70b79849
DG
799 kvm_put_fp(cs);
800
93dd5e85 801 if (env->tlb_dirty) {
1bc22652 802 kvm_sw_tlb_put(cpu);
93dd5e85
SW
803 env->tlb_dirty = false;
804 }
805
f1af19d7
DG
806 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
807 struct kvm_sregs sregs;
808
809 sregs.pvr = env->spr[SPR_PVR];
810
811 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
812
813 /* Sync SLB */
814#ifdef TARGET_PPC64
d83af167 815 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
f1af19d7 816 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
69b31b90
AK
817 if (env->slb[i].esid & SLB_ESID_V) {
818 sregs.u.s.ppc64.slb[i].slbe |= i;
819 }
f1af19d7
DG
820 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
821 }
822#endif
823
824 /* Sync SRs */
825 for (i = 0; i < 16; i++) {
826 sregs.u.s.ppc32.sr[i] = env->sr[i];
827 }
828
829 /* Sync BATs */
830 for (i = 0; i < 8; i++) {
ef8beb0e
AG
831 /* Beware. We have to swap upper and lower bits here */
832 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
833 | env->DBAT[1][i];
834 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
835 | env->IBAT[1][i];
f1af19d7
DG
836 }
837
1bc22652 838 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
f1af19d7
DG
839 if (ret) {
840 return ret;
841 }
842 }
843
844 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
845 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
846 }
f1af19d7 847
d67d40ea
DG
848 if (cap_one_reg) {
849 int i;
850
851 /* We deliberately ignore errors here, for kernels which have
852 * the ONE_REG calls, but don't support the specific
853 * registers, there's a reasonable chance things will still
854 * work, at least until we try to migrate. */
855 for (i = 0; i < 1024; i++) {
856 uint64_t id = env->spr_cb[i].one_reg_id;
857
858 if (id != 0) {
859 kvm_put_one_spr(cs, id, i);
860 }
f1af19d7 861 }
9b00ea49
DG
862
863#ifdef TARGET_PPC64
864 if (cap_papr) {
865 if (kvm_put_vpa(cs) < 0) {
da56ff91 866 DPRINTF("Warning: Unable to set VPA information to KVM\n");
9b00ea49
DG
867 }
868 }
98a8b524
AK
869
870 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 871#endif /* TARGET_PPC64 */
f1af19d7
DG
872 }
873
d76d1650
AJ
874 return ret;
875}
876
20d695a9 877int kvm_arch_get_registers(CPUState *cs)
d76d1650 878{
20d695a9
AF
879 PowerPCCPU *cpu = POWERPC_CPU(cs);
880 CPUPPCState *env = &cpu->env;
d76d1650 881 struct kvm_regs regs;
ba5e5090 882 struct kvm_sregs sregs;
90dc8812 883 uint32_t cr;
138b38b6 884 int i, ret;
d76d1650 885
1bc22652 886 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
887 if (ret < 0)
888 return ret;
889
90dc8812
SW
890 cr = regs.cr;
891 for (i = 7; i >= 0; i--) {
892 env->crf[i] = cr & 15;
893 cr >>= 4;
894 }
ba5e5090 895
d76d1650
AJ
896 env->ctr = regs.ctr;
897 env->lr = regs.lr;
da91a00f 898 cpu_write_xer(env, regs.xer);
d76d1650
AJ
899 env->msr = regs.msr;
900 env->nip = regs.pc;
901
902 env->spr[SPR_SRR0] = regs.srr0;
903 env->spr[SPR_SRR1] = regs.srr1;
904
905 env->spr[SPR_SPRG0] = regs.sprg0;
906 env->spr[SPR_SPRG1] = regs.sprg1;
907 env->spr[SPR_SPRG2] = regs.sprg2;
908 env->spr[SPR_SPRG3] = regs.sprg3;
909 env->spr[SPR_SPRG4] = regs.sprg4;
910 env->spr[SPR_SPRG5] = regs.sprg5;
911 env->spr[SPR_SPRG6] = regs.sprg6;
912 env->spr[SPR_SPRG7] = regs.sprg7;
913
90dc8812
SW
914 env->spr[SPR_BOOKE_PID] = regs.pid;
915
d76d1650
AJ
916 for (i = 0;i < 32; i++)
917 env->gpr[i] = regs.gpr[i];
918
70b79849
DG
919 kvm_get_fp(cs);
920
90dc8812 921 if (cap_booke_sregs) {
1bc22652 922 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
923 if (ret < 0) {
924 return ret;
925 }
926
927 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
928 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
929 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
930 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
931 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
932 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
933 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
934 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
935 env->spr[SPR_DECR] = sregs.u.e.dec;
936 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
937 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
938 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
939 }
940
941 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
942 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
943 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
944 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
945 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
946 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
947 }
948
949 if (sregs.u.e.features & KVM_SREGS_E_64) {
950 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
951 }
952
953 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
954 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
955 }
956
957 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
958 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
959 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
960 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
961 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
962 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
963 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
964 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
965 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
966 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
967 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
968 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
969 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
970 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
971 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
972 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
973 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
974
975 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
976 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
977 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
978 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
979 }
980
981 if (sregs.u.e.features & KVM_SREGS_E_PM) {
982 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
983 }
984
985 if (sregs.u.e.features & KVM_SREGS_E_PC) {
986 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
987 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
988 }
989 }
990
991 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
992 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
993 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
994 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
995 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
996 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
997 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
998 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
999 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1000 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1001 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1002 }
1003
1004 if (sregs.u.e.features & KVM_SREGS_EXP) {
1005 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1006 }
1007
1008 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1009 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1010 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1011 }
1012
1013 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1014 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1015 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1016 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1017
1018 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1019 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1020 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1021 }
1022 }
fafc0b6a 1023 }
90dc8812 1024
90dc8812 1025 if (cap_segstate) {
1bc22652 1026 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
1027 if (ret < 0) {
1028 return ret;
1029 }
1030
f3c75d42
AK
1031 if (!env->external_htab) {
1032 ppc_store_sdr1(env, sregs.u.s.sdr1);
1033 }
ba5e5090
AG
1034
1035 /* Sync SLB */
82c09f2f 1036#ifdef TARGET_PPC64
4b4d4a21
AK
1037 /*
1038 * The packed SLB array we get from KVM_GET_SREGS only contains
1039 * information about valid entries. So we flush our internal
1040 * copy to get rid of stale ones, then put all valid SLB entries
1041 * back in.
1042 */
1043 memset(env->slb, 0, sizeof(env->slb));
d83af167 1044 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
4b4d4a21
AK
1045 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1046 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1047 /*
1048 * Only restore valid entries
1049 */
1050 if (rb & SLB_ESID_V) {
1051 ppc_store_slb(env, rb, rs);
1052 }
ba5e5090 1053 }
82c09f2f 1054#endif
ba5e5090
AG
1055
1056 /* Sync SRs */
1057 for (i = 0; i < 16; i++) {
1058 env->sr[i] = sregs.u.s.ppc32.sr[i];
1059 }
1060
1061 /* Sync BATs */
1062 for (i = 0; i < 8; i++) {
1063 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1064 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1065 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1066 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1067 }
fafc0b6a 1068 }
ba5e5090 1069
d67d40ea
DG
1070 if (cap_hior) {
1071 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1072 }
1073
1074 if (cap_one_reg) {
1075 int i;
1076
1077 /* We deliberately ignore errors here, for kernels which have
1078 * the ONE_REG calls, but don't support the specific
1079 * registers, there's a reasonable chance things will still
1080 * work, at least until we try to migrate. */
1081 for (i = 0; i < 1024; i++) {
1082 uint64_t id = env->spr_cb[i].one_reg_id;
1083
1084 if (id != 0) {
1085 kvm_get_one_spr(cs, id, i);
1086 }
1087 }
9b00ea49
DG
1088
1089#ifdef TARGET_PPC64
1090 if (cap_papr) {
1091 if (kvm_get_vpa(cs) < 0) {
da56ff91 1092 DPRINTF("Warning: Unable to get VPA information from KVM\n");
9b00ea49
DG
1093 }
1094 }
98a8b524
AK
1095
1096 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1097#endif
d67d40ea
DG
1098 }
1099
d76d1650
AJ
1100 return 0;
1101}
1102
1bc22652 1103int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
1104{
1105 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1106
1107 if (irq != PPC_INTERRUPT_EXT) {
1108 return 0;
1109 }
1110
1111 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1112 return 0;
1113 }
1114
1bc22652 1115 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
1116
1117 return 0;
1118}
1119
16415335
AG
1120#if defined(TARGET_PPCEMB)
1121#define PPC_INPUT_INT PPC40x_INPUT_INT
1122#elif defined(TARGET_PPC64)
1123#define PPC_INPUT_INT PPC970_INPUT_INT
1124#else
1125#define PPC_INPUT_INT PPC6xx_INPUT_INT
1126#endif
1127
20d695a9 1128void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 1129{
20d695a9
AF
1130 PowerPCCPU *cpu = POWERPC_CPU(cs);
1131 CPUPPCState *env = &cpu->env;
d76d1650
AJ
1132 int r;
1133 unsigned irq;
1134
5cbdb3a3 1135 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 1136 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
1137 if (!cap_interrupt_level &&
1138 run->ready_for_interrupt_injection &&
259186a7 1139 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 1140 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
1141 {
1142 /* For now KVM disregards the 'irq' argument. However, in the
1143 * future KVM could cache it in-kernel to avoid a heavyweight exit
1144 * when reading the UIC.
1145 */
fc87e185 1146 irq = KVM_INTERRUPT_SET;
d76d1650 1147
da56ff91 1148 DPRINTF("injected interrupt %d\n", irq);
1bc22652 1149 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1150 if (r < 0) {
1151 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1152 }
c821c2bd
AG
1153
1154 /* Always wake up soon in case the interrupt was level based */
bc72ad67 1155 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
c821c2bd 1156 (get_ticks_per_sec() / 50));
d76d1650
AJ
1157 }
1158
1159 /* We don't know if there are more interrupts pending after this. However,
1160 * the guest will return to userspace in the course of handling this one
1161 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
1162}
1163
20d695a9 1164void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
d76d1650 1165{
d76d1650
AJ
1166}
1167
20d695a9 1168int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1169{
259186a7 1170 return cs->halted;
0af691d7
MT
1171}
1172
259186a7 1173static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1174{
259186a7
AF
1175 CPUState *cs = CPU(cpu);
1176 CPUPPCState *env = &cpu->env;
1177
1178 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1179 cs->halted = 1;
27103424 1180 cs->exception_index = EXCP_HLT;
d76d1650
AJ
1181 }
1182
bb4ea393 1183 return 0;
d76d1650
AJ
1184}
1185
1186/* map dcr access to existing qemu dcr emulation */
1328c2bf 1187static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1188{
1189 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1190 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1191
bb4ea393 1192 return 0;
d76d1650
AJ
1193}
1194
1328c2bf 1195static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1196{
1197 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1198 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1199
bb4ea393 1200 return 0;
d76d1650
AJ
1201}
1202
20d695a9 1203int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1204{
20d695a9
AF
1205 PowerPCCPU *cpu = POWERPC_CPU(cs);
1206 CPUPPCState *env = &cpu->env;
bb4ea393 1207 int ret;
d76d1650
AJ
1208
1209 switch (run->exit_reason) {
1210 case KVM_EXIT_DCR:
1211 if (run->dcr.is_write) {
da56ff91 1212 DPRINTF("handle dcr write\n");
d76d1650
AJ
1213 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1214 } else {
da56ff91 1215 DPRINTF("handle dcr read\n");
d76d1650
AJ
1216 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1217 }
1218 break;
1219 case KVM_EXIT_HLT:
da56ff91 1220 DPRINTF("handle halt\n");
259186a7 1221 ret = kvmppc_handle_halt(cpu);
d76d1650 1222 break;
c6304a4a 1223#if defined(TARGET_PPC64)
f61b4bed 1224 case KVM_EXIT_PAPR_HCALL:
da56ff91 1225 DPRINTF("handle PAPR hypercall\n");
20d695a9 1226 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1227 run->papr_hcall.nr,
f61b4bed 1228 run->papr_hcall.args);
78e8fde2 1229 ret = 0;
f61b4bed
AG
1230 break;
1231#endif
5b95b8b9 1232 case KVM_EXIT_EPR:
da56ff91 1233 DPRINTF("handle epr\n");
933b19ea 1234 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
5b95b8b9
AG
1235 ret = 0;
1236 break;
31f2cb8f 1237 case KVM_EXIT_WATCHDOG:
da56ff91 1238 DPRINTF("handle watchdog expiry\n");
31f2cb8f
BB
1239 watchdog_perform_action();
1240 ret = 0;
1241 break;
1242
73aaec4a
JK
1243 default:
1244 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1245 ret = -1;
1246 break;
d76d1650
AJ
1247 }
1248
1249 return ret;
1250}
1251
31f2cb8f
BB
1252int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1253{
1254 CPUState *cs = CPU(cpu);
1255 uint32_t bits = tsr_bits;
1256 struct kvm_one_reg reg = {
1257 .id = KVM_REG_PPC_OR_TSR,
1258 .addr = (uintptr_t) &bits,
1259 };
1260
1261 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1262}
1263
1264int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1265{
1266
1267 CPUState *cs = CPU(cpu);
1268 uint32_t bits = tsr_bits;
1269 struct kvm_one_reg reg = {
1270 .id = KVM_REG_PPC_CLEAR_TSR,
1271 .addr = (uintptr_t) &bits,
1272 };
1273
1274 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1275}
1276
1277int kvmppc_set_tcr(PowerPCCPU *cpu)
1278{
1279 CPUState *cs = CPU(cpu);
1280 CPUPPCState *env = &cpu->env;
1281 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1282
1283 struct kvm_one_reg reg = {
1284 .id = KVM_REG_PPC_TCR,
1285 .addr = (uintptr_t) &tcr,
1286 };
1287
1288 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1289}
1290
1291int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1292{
1293 CPUState *cs = CPU(cpu);
31f2cb8f
BB
1294 int ret;
1295
1296 if (!kvm_enabled()) {
1297 return -1;
1298 }
1299
1300 if (!cap_ppc_watchdog) {
1301 printf("warning: KVM does not support watchdog");
1302 return -1;
1303 }
1304
48add816 1305 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
31f2cb8f
BB
1306 if (ret < 0) {
1307 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1308 __func__, strerror(-ret));
1309 return ret;
1310 }
1311
1312 return ret;
1313}
1314
dc333cd6
AG
1315static int read_cpuinfo(const char *field, char *value, int len)
1316{
1317 FILE *f;
1318 int ret = -1;
1319 int field_len = strlen(field);
1320 char line[512];
1321
1322 f = fopen("/proc/cpuinfo", "r");
1323 if (!f) {
1324 return -1;
1325 }
1326
1327 do {
1328 if(!fgets(line, sizeof(line), f)) {
1329 break;
1330 }
1331 if (!strncmp(line, field, field_len)) {
ae215068 1332 pstrcpy(value, len, line);
dc333cd6
AG
1333 ret = 0;
1334 break;
1335 }
1336 } while(*line);
1337
1338 fclose(f);
1339
1340 return ret;
1341}
1342
1343uint32_t kvmppc_get_tbfreq(void)
1344{
1345 char line[512];
1346 char *ns;
1347 uint32_t retval = get_ticks_per_sec();
1348
1349 if (read_cpuinfo("timebase", line, sizeof(line))) {
1350 return retval;
1351 }
1352
1353 if (!(ns = strchr(line, ':'))) {
1354 return retval;
1355 }
1356
1357 ns++;
1358
1359 retval = atoi(ns);
1360 return retval;
1361}
4513d923 1362
eadaada1
AG
1363/* Try to find a device tree node for a CPU with clock-frequency property */
1364static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1365{
1366 struct dirent *dirp;
1367 DIR *dp;
1368
1369 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1370 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1371 return -1;
1372 }
1373
1374 buf[0] = '\0';
1375 while ((dirp = readdir(dp)) != NULL) {
1376 FILE *f;
1377 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1378 dirp->d_name);
1379 f = fopen(buf, "r");
1380 if (f) {
1381 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1382 fclose(f);
1383 break;
1384 }
1385 buf[0] = '\0';
1386 }
1387 closedir(dp);
1388 if (buf[0] == '\0') {
1389 printf("Unknown host!\n");
1390 return -1;
1391 }
1392
1393 return 0;
1394}
1395
9bc884b7
DG
1396/* Read a CPU node property from the host device tree that's a single
1397 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1398 * (can't find or open the property, or doesn't understand the
1399 * format) */
1400static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 1401{
9bc884b7
DG
1402 char buf[PATH_MAX];
1403 union {
1404 uint32_t v32;
1405 uint64_t v64;
1406 } u;
eadaada1
AG
1407 FILE *f;
1408 int len;
1409
1410 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 1411 return -1;
eadaada1
AG
1412 }
1413
9bc884b7
DG
1414 strncat(buf, "/", sizeof(buf) - strlen(buf));
1415 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
1416
1417 f = fopen(buf, "rb");
1418 if (!f) {
1419 return -1;
1420 }
1421
9bc884b7 1422 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1423 fclose(f);
1424 switch (len) {
9bc884b7
DG
1425 case 4:
1426 /* property is a 32-bit quantity */
1427 return be32_to_cpu(u.v32);
1428 case 8:
1429 return be64_to_cpu(u.v64);
eadaada1
AG
1430 }
1431
1432 return 0;
1433}
1434
9bc884b7
DG
1435uint64_t kvmppc_get_clockfreq(void)
1436{
1437 return kvmppc_read_int_cpu_dt("clock-frequency");
1438}
1439
6659394f
DG
1440uint32_t kvmppc_get_vmx(void)
1441{
1442 return kvmppc_read_int_cpu_dt("ibm,vmx");
1443}
1444
1445uint32_t kvmppc_get_dfp(void)
1446{
1447 return kvmppc_read_int_cpu_dt("ibm,dfp");
1448}
1449
1a61a9ae
SY
1450static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1451 {
1452 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1453 CPUState *cs = CPU(cpu);
1454
1455 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1456 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1457 return 0;
1458 }
1459
1460 return 1;
1461}
1462
1463int kvmppc_get_hasidle(CPUPPCState *env)
1464{
1465 struct kvm_ppc_pvinfo pvinfo;
1466
1467 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1468 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1469 return 1;
1470 }
1471
1472 return 0;
1473}
1474
1328c2bf 1475int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1476{
1477 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1478 struct kvm_ppc_pvinfo pvinfo;
1479
1a61a9ae 1480 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1481 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1482 return 0;
1483 }
45024f09
AG
1484
1485 /*
1486 * Fallback to always fail hypercalls:
1487 *
1488 * li r3, -1
1489 * nop
1490 * nop
1491 * nop
1492 */
1493
1494 hc[0] = 0x3860ffff;
1495 hc[1] = 0x60000000;
1496 hc[2] = 0x60000000;
1497 hc[3] = 0x60000000;
1498
1499 return 0;
1500}
1501
1bc22652 1502void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 1503{
1bc22652 1504 CPUState *cs = CPU(cpu);
f61b4bed
AG
1505 int ret;
1506
48add816 1507 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
f61b4bed 1508 if (ret) {
a47dddd7 1509 cpu_abort(cs, "This KVM version does not support PAPR\n");
94135e81 1510 }
9b00ea49
DG
1511
1512 /* Update the capability flag so we sync the right information
1513 * with kvm */
1514 cap_papr = 1;
f61b4bed
AG
1515}
1516
6db5bb0f
AK
1517int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1518{
1519 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1520}
1521
5b95b8b9
AG
1522void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1523{
5b95b8b9 1524 CPUState *cs = CPU(cpu);
5b95b8b9
AG
1525 int ret;
1526
48add816 1527 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
5b95b8b9 1528 if (ret && mpic_proxy) {
a47dddd7 1529 cpu_abort(cs, "This KVM version does not support EPR\n");
5b95b8b9
AG
1530 }
1531}
1532
e97c3636
DG
1533int kvmppc_smt_threads(void)
1534{
1535 return cap_ppc_smt ? cap_ppc_smt : 1;
1536}
1537
7f763a5d 1538#ifdef TARGET_PPC64
354ac20a
DG
1539off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1540{
1541 void *rma;
1542 off_t size;
1543 int fd;
1544 struct kvm_allocate_rma ret;
1545 MemoryRegion *rma_region;
1546
1547 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1548 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1549 * not necessary on this hardware
1550 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1551 *
1552 * FIXME: We should allow the user to force contiguous RMA
1553 * allocation in the cap_ppc_rma==1 case.
1554 */
1555 if (cap_ppc_rma < 2) {
1556 return 0;
1557 }
1558
1559 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1560 if (fd < 0) {
1561 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1562 strerror(errno));
1563 return -1;
1564 }
1565
1566 size = MIN(ret.rma_size, 256ul << 20);
1567
1568 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1569 if (rma == MAP_FAILED) {
1570 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1571 return -1;
1572 };
1573
1574 rma_region = g_new(MemoryRegion, 1);
2c9b15ca 1575 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
6148b23d 1576 vmstate_register_ram_global(rma_region);
354ac20a
DG
1577 memory_region_add_subregion(sysmem, 0, rma_region);
1578
1579 return size;
1580}
1581
7f763a5d
DG
1582uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1583{
f36951c1
DG
1584 struct kvm_ppc_smmu_info info;
1585 long rampagesize, best_page_shift;
1586 int i;
1587
7f763a5d
DG
1588 if (cap_ppc_rma >= 2) {
1589 return current_size;
1590 }
f36951c1
DG
1591
1592 /* Find the largest hardware supported page size that's less than
1593 * or equal to the (logical) backing page size of guest RAM */
182735ef 1594 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
f36951c1
DG
1595 rampagesize = getrampagesize();
1596 best_page_shift = 0;
1597
1598 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1599 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1600
1601 if (!sps->page_shift) {
1602 continue;
1603 }
1604
1605 if ((sps->page_shift > best_page_shift)
1606 && ((1UL << sps->page_shift) <= rampagesize)) {
1607 best_page_shift = sps->page_shift;
1608 }
1609 }
1610
7f763a5d 1611 return MIN(current_size,
f36951c1 1612 1ULL << (best_page_shift + hash_shift - 7));
7f763a5d
DG
1613}
1614#endif
1615
0f5cb298
DG
1616void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1617{
1618 struct kvm_create_spapr_tce args = {
1619 .liobn = liobn,
1620 .window_size = window_size,
1621 };
1622 long len;
1623 int fd;
1624 void *table;
1625
b5aec396
DG
1626 /* Must set fd to -1 so we don't try to munmap when called for
1627 * destroying the table, which the upper layers -will- do
1628 */
1629 *pfd = -1;
0f5cb298
DG
1630 if (!cap_spapr_tce) {
1631 return NULL;
1632 }
1633
1634 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1635 if (fd < 0) {
b5aec396
DG
1636 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1637 liobn);
0f5cb298
DG
1638 return NULL;
1639 }
1640
a83000f5 1641 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
0f5cb298
DG
1642 /* FIXME: round this up to page size */
1643
74b41e56 1644 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1645 if (table == MAP_FAILED) {
b5aec396
DG
1646 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1647 liobn);
0f5cb298
DG
1648 close(fd);
1649 return NULL;
1650 }
1651
1652 *pfd = fd;
1653 return table;
1654}
1655
1656int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1657{
1658 long len;
1659
1660 if (fd < 0) {
1661 return -1;
1662 }
1663
a83000f5 1664 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
0f5cb298
DG
1665 if ((munmap(table, len) < 0) ||
1666 (close(fd) < 0)) {
b5aec396
DG
1667 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1668 strerror(errno));
0f5cb298
DG
1669 /* Leak the table */
1670 }
1671
1672 return 0;
1673}
1674
7f763a5d
DG
1675int kvmppc_reset_htab(int shift_hint)
1676{
1677 uint32_t shift = shift_hint;
1678
ace9a2cb
DG
1679 if (!kvm_enabled()) {
1680 /* Full emulation, tell caller to allocate htab itself */
1681 return 0;
1682 }
1683 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
1684 int ret;
1685 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
1686 if (ret == -ENOTTY) {
1687 /* At least some versions of PR KVM advertise the
1688 * capability, but don't implement the ioctl(). Oops.
1689 * Return 0 so that we allocate the htab in qemu, as is
1690 * correct for PR. */
1691 return 0;
1692 } else if (ret < 0) {
7f763a5d
DG
1693 return ret;
1694 }
1695 return shift;
1696 }
1697
ace9a2cb
DG
1698 /* We have a kernel that predates the htab reset calls. For PR
1699 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1700 * this era, it has allocated a 16MB fixed size hash table
1701 * already. Kernels of this era have the GET_PVINFO capability
1702 * only on PR, so we use this hack to determine the right
1703 * answer */
1704 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1705 /* PR - tell caller to allocate htab */
1706 return 0;
1707 } else {
1708 /* HV - assume 16MB kernel allocated htab */
1709 return 24;
1710 }
7f763a5d
DG
1711}
1712
a1e98583
DG
1713static inline uint32_t mfpvr(void)
1714{
1715 uint32_t pvr;
1716
1717 asm ("mfpvr %0"
1718 : "=r"(pvr));
1719 return pvr;
1720}
1721
a7342588
DG
1722static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1723{
1724 if (on) {
1725 *word |= flags;
1726 } else {
1727 *word &= ~flags;
1728 }
1729}
1730
2985b86b 1731static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 1732{
2985b86b
AF
1733 assert(kvm_enabled());
1734}
1735
1736static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1737{
1738 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
1739 uint32_t vmx = kvmppc_get_vmx();
1740 uint32_t dfp = kvmppc_get_dfp();
0cbad81f
DG
1741 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1742 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
a1e98583 1743
cfe34f44 1744 /* Now fix up the class with information we can query from the host */
3bc9ccc0 1745 pcc->pvr = mfpvr();
a7342588 1746
70bca53f
AG
1747 if (vmx != -1) {
1748 /* Only override when we know what the host supports */
cfe34f44
AF
1749 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1750 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
1751 }
1752 if (dfp != -1) {
1753 /* Only override when we know what the host supports */
cfe34f44 1754 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 1755 }
0cbad81f
DG
1756
1757 if (dcache_size != -1) {
1758 pcc->l1_dcache_size = dcache_size;
1759 }
1760
1761 if (icache_size != -1) {
1762 pcc->l1_icache_size = icache_size;
1763 }
a1e98583
DG
1764}
1765
3b961124
SY
1766bool kvmppc_has_cap_epr(void)
1767{
1768 return cap_epr;
1769}
1770
7c43bca0
AK
1771bool kvmppc_has_cap_htab_fd(void)
1772{
1773 return cap_htab_fd;
1774}
1775
5b79b1ca
AK
1776static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1777{
1778 ObjectClass *oc = OBJECT_CLASS(pcc);
1779
1780 while (oc && !object_class_is_abstract(oc)) {
1781 oc = object_class_get_parent(oc);
1782 }
1783 assert(oc);
1784
1785 return POWERPC_CPU_CLASS(oc);
1786}
1787
5ba4576b
AF
1788static int kvm_ppc_register_host_cpu_type(void)
1789{
1790 TypeInfo type_info = {
1791 .name = TYPE_HOST_POWERPC_CPU,
1792 .instance_init = kvmppc_host_cpu_initfn,
1793 .class_init = kvmppc_host_cpu_class_init,
1794 };
1795 uint32_t host_pvr = mfpvr();
1796 PowerPCCPUClass *pvr_pcc;
5b79b1ca 1797 DeviceClass *dc;
5ba4576b
AF
1798
1799 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
3bc9ccc0
AK
1800 if (pvr_pcc == NULL) {
1801 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1802 }
5ba4576b
AF
1803 if (pvr_pcc == NULL) {
1804 return -1;
1805 }
1806 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1807 type_register(&type_info);
5b79b1ca
AK
1808
1809 /* Register generic family CPU class for a family */
1810 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1811 dc = DEVICE_CLASS(pvr_pcc);
1812 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1813 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1814 type_register(&type_info);
1815
5ba4576b
AF
1816 return 0;
1817}
1818
feaa64c4
DG
1819int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1820{
1821 struct kvm_rtas_token_args args = {
1822 .token = token,
1823 };
1824
1825 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1826 return -ENOENT;
1827 }
1828
1829 strncpy(args.name, function, sizeof(args.name));
1830
1831 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1832}
12b1143b 1833
e68cb8b4
AK
1834int kvmppc_get_htab_fd(bool write)
1835{
1836 struct kvm_get_htab_fd s = {
1837 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1838 .start_index = 0,
1839 };
1840
1841 if (!cap_htab_fd) {
1842 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1843 return -1;
1844 }
1845
1846 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1847}
1848
1849int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1850{
bc72ad67 1851 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
e68cb8b4
AK
1852 uint8_t buf[bufsize];
1853 ssize_t rc;
1854
1855 do {
1856 rc = read(fd, buf, bufsize);
1857 if (rc < 0) {
1858 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1859 strerror(errno));
1860 return rc;
1861 } else if (rc) {
1862 /* Kernel already retuns data in BE format for the file */
1863 qemu_put_buffer(f, buf, rc);
1864 }
1865 } while ((rc != 0)
1866 && ((max_ns < 0)
bc72ad67 1867 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
e68cb8b4
AK
1868
1869 return (rc == 0) ? 1 : 0;
1870}
1871
1872int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1873 uint16_t n_valid, uint16_t n_invalid)
1874{
1875 struct kvm_get_htab_header *buf;
1876 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1877 ssize_t rc;
1878
1879 buf = alloca(chunksize);
1880 /* This is KVM on ppc, so this is all big-endian */
1881 buf->index = index;
1882 buf->n_valid = n_valid;
1883 buf->n_invalid = n_invalid;
1884
1885 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1886
1887 rc = write(fd, buf, chunksize);
1888 if (rc < 0) {
1889 fprintf(stderr, "Error writing KVM hash table: %s\n",
1890 strerror(errno));
1891 return rc;
1892 }
1893 if (rc != chunksize) {
1894 /* We should never get a short write on a single chunk */
1895 fprintf(stderr, "Short write, restoring KVM hash table\n");
1896 return -1;
1897 }
1898 return 0;
1899}
1900
20d695a9 1901bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
1902{
1903 return true;
1904}
a1b87fe0 1905
20d695a9 1906int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
1907{
1908 return 1;
1909}
1910
1911int kvm_arch_on_sigbus(int code, void *addr)
1912{
1913 return 1;
1914}
82169660
SW
1915
1916void kvm_arch_init_irq_routing(KVMState *s)
1917{
1918}
c65f9a07
GK
1919
1920int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1921{
1922 return -EINVAL;
1923}
1924
1925int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1926{
1927 return -EINVAL;
1928}
1929
1930int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1931{
1932 return -EINVAL;
1933}
1934
1935int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1936{
1937 return -EINVAL;
1938}
1939
1940void kvm_arch_remove_all_hw_breakpoints(void)
1941{
1942}
1943
1944void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1945{
1946}
7c43bca0
AK
1947
1948struct kvm_get_htab_buf {
1949 struct kvm_get_htab_header header;
1950 /*
1951 * We require one extra byte for read
1952 */
1953 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1954};
1955
1956uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1957{
1958 int htab_fd;
1959 struct kvm_get_htab_fd ghf;
1960 struct kvm_get_htab_buf *hpte_buf;
1961
1962 ghf.flags = 0;
1963 ghf.start_index = pte_index;
1964 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1965 if (htab_fd < 0) {
1966 goto error_out;
1967 }
1968
1969 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1970 /*
1971 * Read the hpte group
1972 */
1973 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1974 goto out_close;
1975 }
1976
1977 close(htab_fd);
1978 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1979
1980out_close:
1981 g_free(hpte_buf);
1982 close(htab_fd);
1983error_out:
1984 return 0;
1985}
1986
1987void kvmppc_hash64_free_pteg(uint64_t token)
1988{
1989 struct kvm_get_htab_buf *htab_buf;
1990
1991 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1992 hpte);
1993 g_free(htab_buf);
1994 return;
1995}
c1385933
AK
1996
1997void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1998 target_ulong pte0, target_ulong pte1)
1999{
2000 int htab_fd;
2001 struct kvm_get_htab_fd ghf;
2002 struct kvm_get_htab_buf hpte_buf;
2003
2004 ghf.flags = 0;
2005 ghf.start_index = 0; /* Ignored */
2006 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2007 if (htab_fd < 0) {
2008 goto error_out;
2009 }
2010
2011 hpte_buf.header.n_valid = 1;
2012 hpte_buf.header.n_invalid = 0;
2013 hpte_buf.header.index = pte_index;
2014 hpte_buf.hpte[0] = pte0;
2015 hpte_buf.hpte[1] = pte1;
2016 /*
2017 * Write the hpte entry.
2018 * CAUTION: write() has the warn_unused_result attribute. Hence we
2019 * need to check the return value, even though we do nothing.
2020 */
2021 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2022 goto out_close;
2023 }
2024
2025out_close:
2026 close(htab_fd);
2027 return;
2028
2029error_out:
2030 return;
2031}