]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/kvm.c
linux-headers: update linux headers to kvm/next
[mirror_qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
1de7afc9 26#include "qemu/timer.h"
9c17d615
PB
27#include "sysemu/sysemu.h"
28#include "sysemu/kvm.h"
d76d1650
AJ
29#include "kvm_ppc.h"
30#include "cpu.h"
9c17d615
PB
31#include "sysemu/cpus.h"
32#include "sysemu/device_tree.h"
d5aea6f3 33#include "mmu-hash64.h"
d76d1650 34
f61b4bed 35#include "hw/sysbus.h"
0d09e41a
PB
36#include "hw/ppc/spapr.h"
37#include "hw/ppc/spapr_vio.h"
98a8b524 38#include "hw/ppc/ppc.h"
31f2cb8f 39#include "sysemu/watchdog.h"
b36f100e 40#include "trace.h"
f61b4bed 41
d76d1650
AJ
42//#define DEBUG_KVM
43
44#ifdef DEBUG_KVM
da56ff91 45#define DPRINTF(fmt, ...) \
d76d1650
AJ
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47#else
da56ff91 48#define DPRINTF(fmt, ...) \
d76d1650
AJ
49 do { } while (0)
50#endif
51
eadaada1
AG
52#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53
94a8d39a
JK
54const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
56};
57
fc87e185
AG
58static int cap_interrupt_unset = false;
59static int cap_interrupt_level = false;
90dc8812 60static int cap_segstate;
90dc8812 61static int cap_booke_sregs;
e97c3636 62static int cap_ppc_smt;
354ac20a 63static int cap_ppc_rma;
0f5cb298 64static int cap_spapr_tce;
da95324e 65static int cap_spapr_multitce;
f1af19d7 66static int cap_hior;
d67d40ea 67static int cap_one_reg;
3b961124 68static int cap_epr;
31f2cb8f 69static int cap_ppc_watchdog;
9b00ea49 70static int cap_papr;
e68cb8b4 71static int cap_htab_fd;
fc87e185 72
c821c2bd
AG
73/* XXX We have a race condition where we actually have a level triggered
74 * interrupt, but the infrastructure can't expose that yet, so the guest
75 * takes but ignores it, goes to sleep and never gets notified that there's
76 * still an interrupt pending.
c6a94ba5 77 *
c821c2bd
AG
78 * As a quick workaround, let's just wake up again 20 ms after we injected
79 * an interrupt. That way we can assure that we're always reinjecting
80 * interrupts in case the guest swallowed them.
c6a94ba5
AG
81 */
82static QEMUTimer *idle_timer;
83
d5a68146 84static void kvm_kick_cpu(void *opaque)
c6a94ba5 85{
d5a68146 86 PowerPCCPU *cpu = opaque;
d5a68146 87
c08d7424 88 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
89}
90
5ba4576b
AF
91static int kvm_ppc_register_host_cpu_type(void);
92
cad1e282 93int kvm_arch_init(KVMState *s)
d76d1650 94{
fc87e185 95 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 96 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 97 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 98 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 99 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 100 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 101 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
da95324e 102 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
d67d40ea 103 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 104 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
3b961124 105 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
31f2cb8f 106 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
9b00ea49
DG
107 /* Note: we don't set cap_papr here, because this capability is
108 * only activated after this by kvmppc_set_papr() */
e68cb8b4 109 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
fc87e185
AG
110
111 if (!cap_interrupt_level) {
112 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
113 "VM to stall at times!\n");
114 }
115
5ba4576b
AF
116 kvm_ppc_register_host_cpu_type();
117
d76d1650
AJ
118 return 0;
119}
120
1bc22652 121static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 122{
1bc22652
AF
123 CPUPPCState *cenv = &cpu->env;
124 CPUState *cs = CPU(cpu);
861bbc80 125 struct kvm_sregs sregs;
5666ca4a
SW
126 int ret;
127
128 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
129 /* What we're really trying to say is "if we're on BookE, we use
130 the native PVR for now". This is the only sane way to check
131 it though, so we potentially confuse users that they can run
132 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
133 return 0;
134 } else {
90dc8812 135 if (!cap_segstate) {
64e07be5
AG
136 fprintf(stderr, "kvm error: missing PVR setting capability\n");
137 return -ENOSYS;
5666ca4a 138 }
5666ca4a
SW
139 }
140
1bc22652 141 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
142 if (ret) {
143 return ret;
144 }
861bbc80
AG
145
146 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 147 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
148}
149
93dd5e85 150/* Set up a shared TLB array with KVM */
1bc22652 151static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 152{
1bc22652
AF
153 CPUPPCState *env = &cpu->env;
154 CPUState *cs = CPU(cpu);
93dd5e85
SW
155 struct kvm_book3e_206_tlb_params params = {};
156 struct kvm_config_tlb cfg = {};
93dd5e85
SW
157 unsigned int entries = 0;
158 int ret, i;
159
160 if (!kvm_enabled() ||
a60f24b5 161 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
162 return 0;
163 }
164
165 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
166
167 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
168 params.tlb_sizes[i] = booke206_tlb_size(env, i);
169 params.tlb_ways[i] = booke206_tlb_ways(env, i);
170 entries += params.tlb_sizes[i];
171 }
172
173 assert(entries == env->nb_tlb);
174 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
175
176 env->tlb_dirty = true;
177
178 cfg.array = (uintptr_t)env->tlb.tlbm;
179 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
180 cfg.params = (uintptr_t)&params;
181 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
182
48add816 183 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
93dd5e85
SW
184 if (ret < 0) {
185 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
186 __func__, strerror(-ret));
187 return ret;
188 }
189
190 env->kvm_sw_tlb = true;
191 return 0;
192}
193
4656e1f0
BH
194
195#if defined(TARGET_PPC64)
a60f24b5 196static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
197 struct kvm_ppc_smmu_info *info)
198{
a60f24b5
AF
199 CPUPPCState *env = &cpu->env;
200 CPUState *cs = CPU(cpu);
201
4656e1f0
BH
202 memset(info, 0, sizeof(*info));
203
204 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
205 * need to "guess" what the supported page sizes are.
206 *
207 * For that to work we make a few assumptions:
208 *
209 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
210 * KVM which only supports 4K and 16M pages, but supports them
211 * regardless of the backing store characteritics. We also don't
212 * support 1T segments.
213 *
214 * This is safe as if HV KVM ever supports that capability or PR
215 * KVM grows supports for more page/segment sizes, those versions
216 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
217 * will not hit this fallback
218 *
219 * - Else we are running HV KVM. This means we only support page
220 * sizes that fit in the backing store. Additionally we only
221 * advertize 64K pages if the processor is ARCH 2.06 and we assume
222 * P7 encodings for the SLB and hash table. Here too, we assume
223 * support for any newer processor will mean a kernel that
224 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
225 * this fallback.
226 */
a60f24b5 227 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
228 /* No flags */
229 info->flags = 0;
230 info->slb_size = 64;
231
232 /* Standard 4k base page size segment */
233 info->sps[0].page_shift = 12;
234 info->sps[0].slb_enc = 0;
235 info->sps[0].enc[0].page_shift = 12;
236 info->sps[0].enc[0].pte_enc = 0;
237
238 /* Standard 16M large page size segment */
239 info->sps[1].page_shift = 24;
240 info->sps[1].slb_enc = SLB_VSID_L;
241 info->sps[1].enc[0].page_shift = 24;
242 info->sps[1].enc[0].pte_enc = 0;
243 } else {
244 int i = 0;
245
246 /* HV KVM has backing store size restrictions */
247 info->flags = KVM_PPC_PAGE_SIZES_REAL;
248
249 if (env->mmu_model & POWERPC_MMU_1TSEG) {
250 info->flags |= KVM_PPC_1T_SEGMENTS;
251 }
252
253 if (env->mmu_model == POWERPC_MMU_2_06) {
254 info->slb_size = 32;
255 } else {
256 info->slb_size = 64;
257 }
258
259 /* Standard 4k base page size segment */
260 info->sps[i].page_shift = 12;
261 info->sps[i].slb_enc = 0;
262 info->sps[i].enc[0].page_shift = 12;
263 info->sps[i].enc[0].pte_enc = 0;
264 i++;
265
266 /* 64K on MMU 2.06 */
267 if (env->mmu_model == POWERPC_MMU_2_06) {
268 info->sps[i].page_shift = 16;
269 info->sps[i].slb_enc = 0x110;
270 info->sps[i].enc[0].page_shift = 16;
271 info->sps[i].enc[0].pte_enc = 1;
272 i++;
273 }
274
275 /* Standard 16M large page size segment */
276 info->sps[i].page_shift = 24;
277 info->sps[i].slb_enc = SLB_VSID_L;
278 info->sps[i].enc[0].page_shift = 24;
279 info->sps[i].enc[0].pte_enc = 0;
280 }
281}
282
a60f24b5 283static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 284{
a60f24b5 285 CPUState *cs = CPU(cpu);
4656e1f0
BH
286 int ret;
287
a60f24b5
AF
288 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
289 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
290 if (ret == 0) {
291 return;
292 }
293 }
294
a60f24b5 295 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
296}
297
298static long getrampagesize(void)
299{
300 struct statfs fs;
301 int ret;
302
303 if (!mem_path) {
304 /* guest RAM is backed by normal anonymous pages */
305 return getpagesize();
306 }
307
308 do {
309 ret = statfs(mem_path, &fs);
310 } while (ret != 0 && errno == EINTR);
311
312 if (ret != 0) {
313 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
314 strerror(errno));
315 exit(1);
316 }
317
318#define HUGETLBFS_MAGIC 0x958458f6
319
320 if (fs.f_type != HUGETLBFS_MAGIC) {
321 /* Explicit mempath, but it's ordinary pages */
322 return getpagesize();
323 }
324
325 /* It's hugepage, return the huge page size */
326 return fs.f_bsize;
327}
328
329static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
330{
331 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
332 return true;
333 }
334
335 return (1ul << shift) <= rampgsize;
336}
337
a60f24b5 338static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
339{
340 static struct kvm_ppc_smmu_info smmu_info;
341 static bool has_smmu_info;
a60f24b5 342 CPUPPCState *env = &cpu->env;
4656e1f0
BH
343 long rampagesize;
344 int iq, ik, jq, jk;
345
346 /* We only handle page sizes for 64-bit server guests for now */
347 if (!(env->mmu_model & POWERPC_MMU_64)) {
348 return;
349 }
350
351 /* Collect MMU info from kernel if not already */
352 if (!has_smmu_info) {
a60f24b5 353 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
354 has_smmu_info = true;
355 }
356
357 rampagesize = getrampagesize();
358
359 /* Convert to QEMU form */
360 memset(&env->sps, 0, sizeof(env->sps));
361
08215d8f
AG
362 /*
363 * XXX This loop should be an entry wide AND of the capabilities that
364 * the selected CPU has with the capabilities that KVM supports.
365 */
4656e1f0
BH
366 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
367 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
368 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
369
370 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
371 ksps->page_shift)) {
372 continue;
373 }
374 qsps->page_shift = ksps->page_shift;
375 qsps->slb_enc = ksps->slb_enc;
376 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
377 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
378 ksps->enc[jk].page_shift)) {
379 continue;
380 }
381 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
382 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
383 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
384 break;
385 }
386 }
387 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
388 break;
389 }
390 }
391 env->slb_nr = smmu_info.slb_size;
08215d8f 392 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
4656e1f0
BH
393 env->mmu_model &= ~POWERPC_MMU_1TSEG;
394 }
395}
396#else /* defined (TARGET_PPC64) */
397
a60f24b5 398static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
399{
400}
401
402#endif /* !defined (TARGET_PPC64) */
403
b164e48e
EH
404unsigned long kvm_arch_vcpu_id(CPUState *cpu)
405{
0f20ba62 406 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
b164e48e
EH
407}
408
20d695a9 409int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 410{
20d695a9
AF
411 PowerPCCPU *cpu = POWERPC_CPU(cs);
412 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
413 int ret;
414
4656e1f0 415 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 416 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
417
418 /* Synchronize sregs with kvm */
1bc22652 419 ret = kvm_arch_sync_sregs(cpu);
5666ca4a
SW
420 if (ret) {
421 return ret;
422 }
861bbc80 423
bc72ad67 424 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
c821c2bd 425
93dd5e85
SW
426 /* Some targets support access to KVM's guest TLB. */
427 switch (cenv->mmu_model) {
428 case POWERPC_MMU_BOOKE206:
1bc22652 429 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
430 break;
431 default:
432 break;
433 }
434
861bbc80 435 return ret;
d76d1650
AJ
436}
437
1bc22652 438static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 439{
1bc22652
AF
440 CPUPPCState *env = &cpu->env;
441 CPUState *cs = CPU(cpu);
93dd5e85
SW
442 struct kvm_dirty_tlb dirty_tlb;
443 unsigned char *bitmap;
444 int ret;
445
446 if (!env->kvm_sw_tlb) {
447 return;
448 }
449
450 bitmap = g_malloc((env->nb_tlb + 7) / 8);
451 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
452
453 dirty_tlb.bitmap = (uintptr_t)bitmap;
454 dirty_tlb.num_dirty = env->nb_tlb;
455
1bc22652 456 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
457 if (ret) {
458 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__, strerror(-ret));
460 }
461
462 g_free(bitmap);
463}
464
d67d40ea
DG
465static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
466{
467 PowerPCCPU *cpu = POWERPC_CPU(cs);
468 CPUPPCState *env = &cpu->env;
469 union {
470 uint32_t u32;
471 uint64_t u64;
472 } val;
473 struct kvm_one_reg reg = {
474 .id = id,
475 .addr = (uintptr_t) &val,
476 };
477 int ret;
478
479 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
480 if (ret != 0) {
b36f100e 481 trace_kvm_failed_spr_get(spr, strerror(errno));
d67d40ea
DG
482 } else {
483 switch (id & KVM_REG_SIZE_MASK) {
484 case KVM_REG_SIZE_U32:
485 env->spr[spr] = val.u32;
486 break;
487
488 case KVM_REG_SIZE_U64:
489 env->spr[spr] = val.u64;
490 break;
491
492 default:
493 /* Don't handle this size yet */
494 abort();
495 }
496 }
497}
498
499static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
500{
501 PowerPCCPU *cpu = POWERPC_CPU(cs);
502 CPUPPCState *env = &cpu->env;
503 union {
504 uint32_t u32;
505 uint64_t u64;
506 } val;
507 struct kvm_one_reg reg = {
508 .id = id,
509 .addr = (uintptr_t) &val,
510 };
511 int ret;
512
513 switch (id & KVM_REG_SIZE_MASK) {
514 case KVM_REG_SIZE_U32:
515 val.u32 = env->spr[spr];
516 break;
517
518 case KVM_REG_SIZE_U64:
519 val.u64 = env->spr[spr];
520 break;
521
522 default:
523 /* Don't handle this size yet */
524 abort();
525 }
526
527 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
528 if (ret != 0) {
b36f100e 529 trace_kvm_failed_spr_set(spr, strerror(errno));
d67d40ea
DG
530 }
531}
532
70b79849
DG
533static int kvm_put_fp(CPUState *cs)
534{
535 PowerPCCPU *cpu = POWERPC_CPU(cs);
536 CPUPPCState *env = &cpu->env;
537 struct kvm_one_reg reg;
538 int i;
539 int ret;
540
541 if (env->insns_flags & PPC_FLOAT) {
542 uint64_t fpscr = env->fpscr;
543 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
544
545 reg.id = KVM_REG_PPC_FPSCR;
546 reg.addr = (uintptr_t)&fpscr;
547 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
548 if (ret < 0) {
da56ff91 549 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
70b79849
DG
550 return ret;
551 }
552
553 for (i = 0; i < 32; i++) {
554 uint64_t vsr[2];
555
556 vsr[0] = float64_val(env->fpr[i]);
557 vsr[1] = env->vsr[i];
558 reg.addr = (uintptr_t) &vsr;
559 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
560
561 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
562 if (ret < 0) {
da56ff91 563 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
70b79849
DG
564 i, strerror(errno));
565 return ret;
566 }
567 }
568 }
569
570 if (env->insns_flags & PPC_ALTIVEC) {
571 reg.id = KVM_REG_PPC_VSCR;
572 reg.addr = (uintptr_t)&env->vscr;
573 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
574 if (ret < 0) {
da56ff91 575 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
70b79849
DG
576 return ret;
577 }
578
579 for (i = 0; i < 32; i++) {
580 reg.id = KVM_REG_PPC_VR(i);
581 reg.addr = (uintptr_t)&env->avr[i];
582 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
583 if (ret < 0) {
da56ff91 584 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
70b79849
DG
585 return ret;
586 }
587 }
588 }
589
590 return 0;
591}
592
593static int kvm_get_fp(CPUState *cs)
594{
595 PowerPCCPU *cpu = POWERPC_CPU(cs);
596 CPUPPCState *env = &cpu->env;
597 struct kvm_one_reg reg;
598 int i;
599 int ret;
600
601 if (env->insns_flags & PPC_FLOAT) {
602 uint64_t fpscr;
603 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
604
605 reg.id = KVM_REG_PPC_FPSCR;
606 reg.addr = (uintptr_t)&fpscr;
607 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
608 if (ret < 0) {
da56ff91 609 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
70b79849
DG
610 return ret;
611 } else {
612 env->fpscr = fpscr;
613 }
614
615 for (i = 0; i < 32; i++) {
616 uint64_t vsr[2];
617
618 reg.addr = (uintptr_t) &vsr;
619 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
620
621 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
622 if (ret < 0) {
da56ff91 623 DPRINTF("Unable to get %s%d from KVM: %s\n",
70b79849
DG
624 vsx ? "VSR" : "FPR", i, strerror(errno));
625 return ret;
626 } else {
627 env->fpr[i] = vsr[0];
628 if (vsx) {
629 env->vsr[i] = vsr[1];
630 }
631 }
632 }
633 }
634
635 if (env->insns_flags & PPC_ALTIVEC) {
636 reg.id = KVM_REG_PPC_VSCR;
637 reg.addr = (uintptr_t)&env->vscr;
638 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
639 if (ret < 0) {
da56ff91 640 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
70b79849
DG
641 return ret;
642 }
643
644 for (i = 0; i < 32; i++) {
645 reg.id = KVM_REG_PPC_VR(i);
646 reg.addr = (uintptr_t)&env->avr[i];
647 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
648 if (ret < 0) {
da56ff91 649 DPRINTF("Unable to get VR%d from KVM: %s\n",
70b79849
DG
650 i, strerror(errno));
651 return ret;
652 }
653 }
654 }
655
656 return 0;
657}
658
9b00ea49
DG
659#if defined(TARGET_PPC64)
660static int kvm_get_vpa(CPUState *cs)
661{
662 PowerPCCPU *cpu = POWERPC_CPU(cs);
663 CPUPPCState *env = &cpu->env;
664 struct kvm_one_reg reg;
665 int ret;
666
667 reg.id = KVM_REG_PPC_VPA_ADDR;
668 reg.addr = (uintptr_t)&env->vpa_addr;
669 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
670 if (ret < 0) {
da56ff91 671 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
9b00ea49
DG
672 return ret;
673 }
674
675 assert((uintptr_t)&env->slb_shadow_size
676 == ((uintptr_t)&env->slb_shadow_addr + 8));
677 reg.id = KVM_REG_PPC_VPA_SLB;
678 reg.addr = (uintptr_t)&env->slb_shadow_addr;
679 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
680 if (ret < 0) {
da56ff91 681 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
9b00ea49
DG
682 strerror(errno));
683 return ret;
684 }
685
686 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
687 reg.id = KVM_REG_PPC_VPA_DTL;
688 reg.addr = (uintptr_t)&env->dtl_addr;
689 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
690 if (ret < 0) {
da56ff91 691 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
9b00ea49
DG
692 strerror(errno));
693 return ret;
694 }
695
696 return 0;
697}
698
699static int kvm_put_vpa(CPUState *cs)
700{
701 PowerPCCPU *cpu = POWERPC_CPU(cs);
702 CPUPPCState *env = &cpu->env;
703 struct kvm_one_reg reg;
704 int ret;
705
706 /* SLB shadow or DTL can't be registered unless a master VPA is
707 * registered. That means when restoring state, if a VPA *is*
708 * registered, we need to set that up first. If not, we need to
709 * deregister the others before deregistering the master VPA */
710 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
711
712 if (env->vpa_addr) {
713 reg.id = KVM_REG_PPC_VPA_ADDR;
714 reg.addr = (uintptr_t)&env->vpa_addr;
715 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
716 if (ret < 0) {
da56ff91 717 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
718 return ret;
719 }
720 }
721
722 assert((uintptr_t)&env->slb_shadow_size
723 == ((uintptr_t)&env->slb_shadow_addr + 8));
724 reg.id = KVM_REG_PPC_VPA_SLB;
725 reg.addr = (uintptr_t)&env->slb_shadow_addr;
726 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
727 if (ret < 0) {
da56ff91 728 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
9b00ea49
DG
729 return ret;
730 }
731
732 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
733 reg.id = KVM_REG_PPC_VPA_DTL;
734 reg.addr = (uintptr_t)&env->dtl_addr;
735 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736 if (ret < 0) {
da56ff91 737 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
9b00ea49
DG
738 strerror(errno));
739 return ret;
740 }
741
742 if (!env->vpa_addr) {
743 reg.id = KVM_REG_PPC_VPA_ADDR;
744 reg.addr = (uintptr_t)&env->vpa_addr;
745 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
746 if (ret < 0) {
da56ff91 747 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
748 return ret;
749 }
750 }
751
752 return 0;
753}
754#endif /* TARGET_PPC64 */
755
20d695a9 756int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 757{
20d695a9
AF
758 PowerPCCPU *cpu = POWERPC_CPU(cs);
759 CPUPPCState *env = &cpu->env;
d76d1650
AJ
760 struct kvm_regs regs;
761 int ret;
762 int i;
763
1bc22652
AF
764 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
765 if (ret < 0) {
d76d1650 766 return ret;
1bc22652 767 }
d76d1650
AJ
768
769 regs.ctr = env->ctr;
770 regs.lr = env->lr;
da91a00f 771 regs.xer = cpu_read_xer(env);
d76d1650
AJ
772 regs.msr = env->msr;
773 regs.pc = env->nip;
774
775 regs.srr0 = env->spr[SPR_SRR0];
776 regs.srr1 = env->spr[SPR_SRR1];
777
778 regs.sprg0 = env->spr[SPR_SPRG0];
779 regs.sprg1 = env->spr[SPR_SPRG1];
780 regs.sprg2 = env->spr[SPR_SPRG2];
781 regs.sprg3 = env->spr[SPR_SPRG3];
782 regs.sprg4 = env->spr[SPR_SPRG4];
783 regs.sprg5 = env->spr[SPR_SPRG5];
784 regs.sprg6 = env->spr[SPR_SPRG6];
785 regs.sprg7 = env->spr[SPR_SPRG7];
786
90dc8812
SW
787 regs.pid = env->spr[SPR_BOOKE_PID];
788
d76d1650
AJ
789 for (i = 0;i < 32; i++)
790 regs.gpr[i] = env->gpr[i];
791
4bddaf55
AK
792 regs.cr = 0;
793 for (i = 0; i < 8; i++) {
794 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
795 }
796
1bc22652 797 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
798 if (ret < 0)
799 return ret;
800
70b79849
DG
801 kvm_put_fp(cs);
802
93dd5e85 803 if (env->tlb_dirty) {
1bc22652 804 kvm_sw_tlb_put(cpu);
93dd5e85
SW
805 env->tlb_dirty = false;
806 }
807
f1af19d7
DG
808 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
809 struct kvm_sregs sregs;
810
811 sregs.pvr = env->spr[SPR_PVR];
812
813 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
814
815 /* Sync SLB */
816#ifdef TARGET_PPC64
d83af167 817 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
f1af19d7 818 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
69b31b90
AK
819 if (env->slb[i].esid & SLB_ESID_V) {
820 sregs.u.s.ppc64.slb[i].slbe |= i;
821 }
f1af19d7
DG
822 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
823 }
824#endif
825
826 /* Sync SRs */
827 for (i = 0; i < 16; i++) {
828 sregs.u.s.ppc32.sr[i] = env->sr[i];
829 }
830
831 /* Sync BATs */
832 for (i = 0; i < 8; i++) {
ef8beb0e
AG
833 /* Beware. We have to swap upper and lower bits here */
834 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
835 | env->DBAT[1][i];
836 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
837 | env->IBAT[1][i];
f1af19d7
DG
838 }
839
1bc22652 840 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
f1af19d7
DG
841 if (ret) {
842 return ret;
843 }
844 }
845
846 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
847 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
848 }
f1af19d7 849
d67d40ea
DG
850 if (cap_one_reg) {
851 int i;
852
853 /* We deliberately ignore errors here, for kernels which have
854 * the ONE_REG calls, but don't support the specific
855 * registers, there's a reasonable chance things will still
856 * work, at least until we try to migrate. */
857 for (i = 0; i < 1024; i++) {
858 uint64_t id = env->spr_cb[i].one_reg_id;
859
860 if (id != 0) {
861 kvm_put_one_spr(cs, id, i);
862 }
f1af19d7 863 }
9b00ea49
DG
864
865#ifdef TARGET_PPC64
866 if (cap_papr) {
867 if (kvm_put_vpa(cs) < 0) {
da56ff91 868 DPRINTF("Warning: Unable to set VPA information to KVM\n");
9b00ea49
DG
869 }
870 }
98a8b524
AK
871
872 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 873#endif /* TARGET_PPC64 */
f1af19d7
DG
874 }
875
d76d1650
AJ
876 return ret;
877}
878
20d695a9 879int kvm_arch_get_registers(CPUState *cs)
d76d1650 880{
20d695a9
AF
881 PowerPCCPU *cpu = POWERPC_CPU(cs);
882 CPUPPCState *env = &cpu->env;
d76d1650 883 struct kvm_regs regs;
ba5e5090 884 struct kvm_sregs sregs;
90dc8812 885 uint32_t cr;
138b38b6 886 int i, ret;
d76d1650 887
1bc22652 888 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
889 if (ret < 0)
890 return ret;
891
90dc8812
SW
892 cr = regs.cr;
893 for (i = 7; i >= 0; i--) {
894 env->crf[i] = cr & 15;
895 cr >>= 4;
896 }
ba5e5090 897
d76d1650
AJ
898 env->ctr = regs.ctr;
899 env->lr = regs.lr;
da91a00f 900 cpu_write_xer(env, regs.xer);
d76d1650
AJ
901 env->msr = regs.msr;
902 env->nip = regs.pc;
903
904 env->spr[SPR_SRR0] = regs.srr0;
905 env->spr[SPR_SRR1] = regs.srr1;
906
907 env->spr[SPR_SPRG0] = regs.sprg0;
908 env->spr[SPR_SPRG1] = regs.sprg1;
909 env->spr[SPR_SPRG2] = regs.sprg2;
910 env->spr[SPR_SPRG3] = regs.sprg3;
911 env->spr[SPR_SPRG4] = regs.sprg4;
912 env->spr[SPR_SPRG5] = regs.sprg5;
913 env->spr[SPR_SPRG6] = regs.sprg6;
914 env->spr[SPR_SPRG7] = regs.sprg7;
915
90dc8812
SW
916 env->spr[SPR_BOOKE_PID] = regs.pid;
917
d76d1650
AJ
918 for (i = 0;i < 32; i++)
919 env->gpr[i] = regs.gpr[i];
920
70b79849
DG
921 kvm_get_fp(cs);
922
90dc8812 923 if (cap_booke_sregs) {
1bc22652 924 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
925 if (ret < 0) {
926 return ret;
927 }
928
929 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
930 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
931 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
932 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
933 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
934 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
935 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
936 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
937 env->spr[SPR_DECR] = sregs.u.e.dec;
938 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
939 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
940 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
941 }
942
943 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
944 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
945 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
946 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
947 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
948 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
949 }
950
951 if (sregs.u.e.features & KVM_SREGS_E_64) {
952 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
953 }
954
955 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
956 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
957 }
958
959 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
960 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
961 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
962 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
963 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
964 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
965 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
966 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
967 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
968 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
969 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
970 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
971 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
972 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
973 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
974 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
975 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
976
977 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
978 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
979 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
980 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
981 }
982
983 if (sregs.u.e.features & KVM_SREGS_E_PM) {
984 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
985 }
986
987 if (sregs.u.e.features & KVM_SREGS_E_PC) {
988 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
989 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
990 }
991 }
992
993 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
994 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
995 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
996 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
997 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
998 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
999 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1000 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1001 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1002 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1003 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1004 }
1005
1006 if (sregs.u.e.features & KVM_SREGS_EXP) {
1007 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1008 }
1009
1010 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1011 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1012 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1013 }
1014
1015 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1016 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1017 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1018 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1019
1020 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1021 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1022 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1023 }
1024 }
fafc0b6a 1025 }
90dc8812 1026
90dc8812 1027 if (cap_segstate) {
1bc22652 1028 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
1029 if (ret < 0) {
1030 return ret;
1031 }
1032
f3c75d42
AK
1033 if (!env->external_htab) {
1034 ppc_store_sdr1(env, sregs.u.s.sdr1);
1035 }
ba5e5090
AG
1036
1037 /* Sync SLB */
82c09f2f 1038#ifdef TARGET_PPC64
4b4d4a21
AK
1039 /*
1040 * The packed SLB array we get from KVM_GET_SREGS only contains
1041 * information about valid entries. So we flush our internal
1042 * copy to get rid of stale ones, then put all valid SLB entries
1043 * back in.
1044 */
1045 memset(env->slb, 0, sizeof(env->slb));
d83af167 1046 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
4b4d4a21
AK
1047 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1048 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1049 /*
1050 * Only restore valid entries
1051 */
1052 if (rb & SLB_ESID_V) {
1053 ppc_store_slb(env, rb, rs);
1054 }
ba5e5090 1055 }
82c09f2f 1056#endif
ba5e5090
AG
1057
1058 /* Sync SRs */
1059 for (i = 0; i < 16; i++) {
1060 env->sr[i] = sregs.u.s.ppc32.sr[i];
1061 }
1062
1063 /* Sync BATs */
1064 for (i = 0; i < 8; i++) {
1065 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1066 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1067 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1068 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1069 }
fafc0b6a 1070 }
ba5e5090 1071
d67d40ea
DG
1072 if (cap_hior) {
1073 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1074 }
1075
1076 if (cap_one_reg) {
1077 int i;
1078
1079 /* We deliberately ignore errors here, for kernels which have
1080 * the ONE_REG calls, but don't support the specific
1081 * registers, there's a reasonable chance things will still
1082 * work, at least until we try to migrate. */
1083 for (i = 0; i < 1024; i++) {
1084 uint64_t id = env->spr_cb[i].one_reg_id;
1085
1086 if (id != 0) {
1087 kvm_get_one_spr(cs, id, i);
1088 }
1089 }
9b00ea49
DG
1090
1091#ifdef TARGET_PPC64
1092 if (cap_papr) {
1093 if (kvm_get_vpa(cs) < 0) {
da56ff91 1094 DPRINTF("Warning: Unable to get VPA information from KVM\n");
9b00ea49
DG
1095 }
1096 }
98a8b524
AK
1097
1098 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1099#endif
d67d40ea
DG
1100 }
1101
d76d1650
AJ
1102 return 0;
1103}
1104
1bc22652 1105int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
1106{
1107 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1108
1109 if (irq != PPC_INTERRUPT_EXT) {
1110 return 0;
1111 }
1112
1113 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1114 return 0;
1115 }
1116
1bc22652 1117 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
1118
1119 return 0;
1120}
1121
16415335
AG
1122#if defined(TARGET_PPCEMB)
1123#define PPC_INPUT_INT PPC40x_INPUT_INT
1124#elif defined(TARGET_PPC64)
1125#define PPC_INPUT_INT PPC970_INPUT_INT
1126#else
1127#define PPC_INPUT_INT PPC6xx_INPUT_INT
1128#endif
1129
20d695a9 1130void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 1131{
20d695a9
AF
1132 PowerPCCPU *cpu = POWERPC_CPU(cs);
1133 CPUPPCState *env = &cpu->env;
d76d1650
AJ
1134 int r;
1135 unsigned irq;
1136
5cbdb3a3 1137 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 1138 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
1139 if (!cap_interrupt_level &&
1140 run->ready_for_interrupt_injection &&
259186a7 1141 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 1142 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
1143 {
1144 /* For now KVM disregards the 'irq' argument. However, in the
1145 * future KVM could cache it in-kernel to avoid a heavyweight exit
1146 * when reading the UIC.
1147 */
fc87e185 1148 irq = KVM_INTERRUPT_SET;
d76d1650 1149
da56ff91 1150 DPRINTF("injected interrupt %d\n", irq);
1bc22652 1151 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1152 if (r < 0) {
1153 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1154 }
c821c2bd
AG
1155
1156 /* Always wake up soon in case the interrupt was level based */
bc72ad67 1157 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
c821c2bd 1158 (get_ticks_per_sec() / 50));
d76d1650
AJ
1159 }
1160
1161 /* We don't know if there are more interrupts pending after this. However,
1162 * the guest will return to userspace in the course of handling this one
1163 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
1164}
1165
20d695a9 1166void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
d76d1650 1167{
d76d1650
AJ
1168}
1169
20d695a9 1170int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1171{
259186a7 1172 return cs->halted;
0af691d7
MT
1173}
1174
259186a7 1175static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1176{
259186a7
AF
1177 CPUState *cs = CPU(cpu);
1178 CPUPPCState *env = &cpu->env;
1179
1180 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1181 cs->halted = 1;
27103424 1182 cs->exception_index = EXCP_HLT;
d76d1650
AJ
1183 }
1184
bb4ea393 1185 return 0;
d76d1650
AJ
1186}
1187
1188/* map dcr access to existing qemu dcr emulation */
1328c2bf 1189static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1190{
1191 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1192 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1193
bb4ea393 1194 return 0;
d76d1650
AJ
1195}
1196
1328c2bf 1197static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1198{
1199 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1200 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1201
bb4ea393 1202 return 0;
d76d1650
AJ
1203}
1204
20d695a9 1205int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1206{
20d695a9
AF
1207 PowerPCCPU *cpu = POWERPC_CPU(cs);
1208 CPUPPCState *env = &cpu->env;
bb4ea393 1209 int ret;
d76d1650
AJ
1210
1211 switch (run->exit_reason) {
1212 case KVM_EXIT_DCR:
1213 if (run->dcr.is_write) {
da56ff91 1214 DPRINTF("handle dcr write\n");
d76d1650
AJ
1215 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1216 } else {
da56ff91 1217 DPRINTF("handle dcr read\n");
d76d1650
AJ
1218 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1219 }
1220 break;
1221 case KVM_EXIT_HLT:
da56ff91 1222 DPRINTF("handle halt\n");
259186a7 1223 ret = kvmppc_handle_halt(cpu);
d76d1650 1224 break;
c6304a4a 1225#if defined(TARGET_PPC64)
f61b4bed 1226 case KVM_EXIT_PAPR_HCALL:
da56ff91 1227 DPRINTF("handle PAPR hypercall\n");
20d695a9 1228 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1229 run->papr_hcall.nr,
f61b4bed 1230 run->papr_hcall.args);
78e8fde2 1231 ret = 0;
f61b4bed
AG
1232 break;
1233#endif
5b95b8b9 1234 case KVM_EXIT_EPR:
da56ff91 1235 DPRINTF("handle epr\n");
933b19ea 1236 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
5b95b8b9
AG
1237 ret = 0;
1238 break;
31f2cb8f 1239 case KVM_EXIT_WATCHDOG:
da56ff91 1240 DPRINTF("handle watchdog expiry\n");
31f2cb8f
BB
1241 watchdog_perform_action();
1242 ret = 0;
1243 break;
1244
73aaec4a
JK
1245 default:
1246 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1247 ret = -1;
1248 break;
d76d1650
AJ
1249 }
1250
1251 return ret;
1252}
1253
31f2cb8f
BB
1254int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1255{
1256 CPUState *cs = CPU(cpu);
1257 uint32_t bits = tsr_bits;
1258 struct kvm_one_reg reg = {
1259 .id = KVM_REG_PPC_OR_TSR,
1260 .addr = (uintptr_t) &bits,
1261 };
1262
1263 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1264}
1265
1266int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1267{
1268
1269 CPUState *cs = CPU(cpu);
1270 uint32_t bits = tsr_bits;
1271 struct kvm_one_reg reg = {
1272 .id = KVM_REG_PPC_CLEAR_TSR,
1273 .addr = (uintptr_t) &bits,
1274 };
1275
1276 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1277}
1278
1279int kvmppc_set_tcr(PowerPCCPU *cpu)
1280{
1281 CPUState *cs = CPU(cpu);
1282 CPUPPCState *env = &cpu->env;
1283 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1284
1285 struct kvm_one_reg reg = {
1286 .id = KVM_REG_PPC_TCR,
1287 .addr = (uintptr_t) &tcr,
1288 };
1289
1290 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1291}
1292
1293int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1294{
1295 CPUState *cs = CPU(cpu);
31f2cb8f
BB
1296 int ret;
1297
1298 if (!kvm_enabled()) {
1299 return -1;
1300 }
1301
1302 if (!cap_ppc_watchdog) {
1303 printf("warning: KVM does not support watchdog");
1304 return -1;
1305 }
1306
48add816 1307 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
31f2cb8f
BB
1308 if (ret < 0) {
1309 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1310 __func__, strerror(-ret));
1311 return ret;
1312 }
1313
1314 return ret;
1315}
1316
dc333cd6
AG
1317static int read_cpuinfo(const char *field, char *value, int len)
1318{
1319 FILE *f;
1320 int ret = -1;
1321 int field_len = strlen(field);
1322 char line[512];
1323
1324 f = fopen("/proc/cpuinfo", "r");
1325 if (!f) {
1326 return -1;
1327 }
1328
1329 do {
1330 if(!fgets(line, sizeof(line), f)) {
1331 break;
1332 }
1333 if (!strncmp(line, field, field_len)) {
ae215068 1334 pstrcpy(value, len, line);
dc333cd6
AG
1335 ret = 0;
1336 break;
1337 }
1338 } while(*line);
1339
1340 fclose(f);
1341
1342 return ret;
1343}
1344
1345uint32_t kvmppc_get_tbfreq(void)
1346{
1347 char line[512];
1348 char *ns;
1349 uint32_t retval = get_ticks_per_sec();
1350
1351 if (read_cpuinfo("timebase", line, sizeof(line))) {
1352 return retval;
1353 }
1354
1355 if (!(ns = strchr(line, ':'))) {
1356 return retval;
1357 }
1358
1359 ns++;
1360
1361 retval = atoi(ns);
1362 return retval;
1363}
4513d923 1364
eadaada1
AG
1365/* Try to find a device tree node for a CPU with clock-frequency property */
1366static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1367{
1368 struct dirent *dirp;
1369 DIR *dp;
1370
1371 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1372 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1373 return -1;
1374 }
1375
1376 buf[0] = '\0';
1377 while ((dirp = readdir(dp)) != NULL) {
1378 FILE *f;
1379 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1380 dirp->d_name);
1381 f = fopen(buf, "r");
1382 if (f) {
1383 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1384 fclose(f);
1385 break;
1386 }
1387 buf[0] = '\0';
1388 }
1389 closedir(dp);
1390 if (buf[0] == '\0') {
1391 printf("Unknown host!\n");
1392 return -1;
1393 }
1394
1395 return 0;
1396}
1397
9bc884b7
DG
1398/* Read a CPU node property from the host device tree that's a single
1399 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1400 * (can't find or open the property, or doesn't understand the
1401 * format) */
1402static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 1403{
9bc884b7
DG
1404 char buf[PATH_MAX];
1405 union {
1406 uint32_t v32;
1407 uint64_t v64;
1408 } u;
eadaada1
AG
1409 FILE *f;
1410 int len;
1411
1412 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 1413 return -1;
eadaada1
AG
1414 }
1415
9bc884b7
DG
1416 strncat(buf, "/", sizeof(buf) - strlen(buf));
1417 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
1418
1419 f = fopen(buf, "rb");
1420 if (!f) {
1421 return -1;
1422 }
1423
9bc884b7 1424 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1425 fclose(f);
1426 switch (len) {
9bc884b7
DG
1427 case 4:
1428 /* property is a 32-bit quantity */
1429 return be32_to_cpu(u.v32);
1430 case 8:
1431 return be64_to_cpu(u.v64);
eadaada1
AG
1432 }
1433
1434 return 0;
1435}
1436
9bc884b7
DG
1437uint64_t kvmppc_get_clockfreq(void)
1438{
1439 return kvmppc_read_int_cpu_dt("clock-frequency");
1440}
1441
6659394f
DG
1442uint32_t kvmppc_get_vmx(void)
1443{
1444 return kvmppc_read_int_cpu_dt("ibm,vmx");
1445}
1446
1447uint32_t kvmppc_get_dfp(void)
1448{
1449 return kvmppc_read_int_cpu_dt("ibm,dfp");
1450}
1451
1a61a9ae
SY
1452static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1453 {
1454 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1455 CPUState *cs = CPU(cpu);
1456
1457 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1458 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1459 return 0;
1460 }
1461
1462 return 1;
1463}
1464
1465int kvmppc_get_hasidle(CPUPPCState *env)
1466{
1467 struct kvm_ppc_pvinfo pvinfo;
1468
1469 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1470 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1471 return 1;
1472 }
1473
1474 return 0;
1475}
1476
1328c2bf 1477int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1478{
1479 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1480 struct kvm_ppc_pvinfo pvinfo;
1481
1a61a9ae 1482 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1483 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1484 return 0;
1485 }
45024f09
AG
1486
1487 /*
1488 * Fallback to always fail hypercalls:
1489 *
1490 * li r3, -1
1491 * nop
1492 * nop
1493 * nop
1494 */
1495
1496 hc[0] = 0x3860ffff;
1497 hc[1] = 0x60000000;
1498 hc[2] = 0x60000000;
1499 hc[3] = 0x60000000;
1500
1501 return 0;
1502}
1503
1bc22652 1504void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 1505{
1bc22652 1506 CPUState *cs = CPU(cpu);
f61b4bed
AG
1507 int ret;
1508
48add816 1509 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
f61b4bed 1510 if (ret) {
a47dddd7 1511 cpu_abort(cs, "This KVM version does not support PAPR\n");
94135e81 1512 }
9b00ea49
DG
1513
1514 /* Update the capability flag so we sync the right information
1515 * with kvm */
1516 cap_papr = 1;
f61b4bed
AG
1517}
1518
6db5bb0f
AK
1519int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1520{
1521 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1522}
1523
5b95b8b9
AG
1524void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1525{
5b95b8b9 1526 CPUState *cs = CPU(cpu);
5b95b8b9
AG
1527 int ret;
1528
48add816 1529 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
5b95b8b9 1530 if (ret && mpic_proxy) {
a47dddd7 1531 cpu_abort(cs, "This KVM version does not support EPR\n");
5b95b8b9
AG
1532 }
1533}
1534
e97c3636
DG
1535int kvmppc_smt_threads(void)
1536{
1537 return cap_ppc_smt ? cap_ppc_smt : 1;
1538}
1539
7f763a5d 1540#ifdef TARGET_PPC64
354ac20a
DG
1541off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1542{
1543 void *rma;
1544 off_t size;
1545 int fd;
1546 struct kvm_allocate_rma ret;
1547 MemoryRegion *rma_region;
1548
1549 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1550 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1551 * not necessary on this hardware
1552 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1553 *
1554 * FIXME: We should allow the user to force contiguous RMA
1555 * allocation in the cap_ppc_rma==1 case.
1556 */
1557 if (cap_ppc_rma < 2) {
1558 return 0;
1559 }
1560
1561 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1562 if (fd < 0) {
1563 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1564 strerror(errno));
1565 return -1;
1566 }
1567
1568 size = MIN(ret.rma_size, 256ul << 20);
1569
1570 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1571 if (rma == MAP_FAILED) {
1572 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1573 return -1;
1574 };
1575
1576 rma_region = g_new(MemoryRegion, 1);
2c9b15ca 1577 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
6148b23d 1578 vmstate_register_ram_global(rma_region);
354ac20a
DG
1579 memory_region_add_subregion(sysmem, 0, rma_region);
1580
1581 return size;
1582}
1583
7f763a5d
DG
1584uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1585{
f36951c1
DG
1586 struct kvm_ppc_smmu_info info;
1587 long rampagesize, best_page_shift;
1588 int i;
1589
7f763a5d
DG
1590 if (cap_ppc_rma >= 2) {
1591 return current_size;
1592 }
f36951c1
DG
1593
1594 /* Find the largest hardware supported page size that's less than
1595 * or equal to the (logical) backing page size of guest RAM */
182735ef 1596 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
f36951c1
DG
1597 rampagesize = getrampagesize();
1598 best_page_shift = 0;
1599
1600 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1601 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1602
1603 if (!sps->page_shift) {
1604 continue;
1605 }
1606
1607 if ((sps->page_shift > best_page_shift)
1608 && ((1UL << sps->page_shift) <= rampagesize)) {
1609 best_page_shift = sps->page_shift;
1610 }
1611 }
1612
7f763a5d 1613 return MIN(current_size,
f36951c1 1614 1ULL << (best_page_shift + hash_shift - 7));
7f763a5d
DG
1615}
1616#endif
1617
da95324e
AK
1618bool kvmppc_spapr_use_multitce(void)
1619{
1620 return cap_spapr_multitce;
1621}
1622
0f5cb298
DG
1623void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1624{
1625 struct kvm_create_spapr_tce args = {
1626 .liobn = liobn,
1627 .window_size = window_size,
1628 };
1629 long len;
1630 int fd;
1631 void *table;
1632
b5aec396
DG
1633 /* Must set fd to -1 so we don't try to munmap when called for
1634 * destroying the table, which the upper layers -will- do
1635 */
1636 *pfd = -1;
0f5cb298
DG
1637 if (!cap_spapr_tce) {
1638 return NULL;
1639 }
1640
1641 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1642 if (fd < 0) {
b5aec396
DG
1643 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1644 liobn);
0f5cb298
DG
1645 return NULL;
1646 }
1647
a83000f5 1648 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
0f5cb298
DG
1649 /* FIXME: round this up to page size */
1650
74b41e56 1651 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1652 if (table == MAP_FAILED) {
b5aec396
DG
1653 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1654 liobn);
0f5cb298
DG
1655 close(fd);
1656 return NULL;
1657 }
1658
1659 *pfd = fd;
1660 return table;
1661}
1662
523e7b8a 1663int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
0f5cb298
DG
1664{
1665 long len;
1666
1667 if (fd < 0) {
1668 return -1;
1669 }
1670
523e7b8a 1671 len = nb_table * sizeof(uint64_t);
0f5cb298
DG
1672 if ((munmap(table, len) < 0) ||
1673 (close(fd) < 0)) {
b5aec396
DG
1674 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1675 strerror(errno));
0f5cb298
DG
1676 /* Leak the table */
1677 }
1678
1679 return 0;
1680}
1681
7f763a5d
DG
1682int kvmppc_reset_htab(int shift_hint)
1683{
1684 uint32_t shift = shift_hint;
1685
ace9a2cb
DG
1686 if (!kvm_enabled()) {
1687 /* Full emulation, tell caller to allocate htab itself */
1688 return 0;
1689 }
1690 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
1691 int ret;
1692 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
1693 if (ret == -ENOTTY) {
1694 /* At least some versions of PR KVM advertise the
1695 * capability, but don't implement the ioctl(). Oops.
1696 * Return 0 so that we allocate the htab in qemu, as is
1697 * correct for PR. */
1698 return 0;
1699 } else if (ret < 0) {
7f763a5d
DG
1700 return ret;
1701 }
1702 return shift;
1703 }
1704
ace9a2cb
DG
1705 /* We have a kernel that predates the htab reset calls. For PR
1706 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707 * this era, it has allocated a 16MB fixed size hash table
1708 * already. Kernels of this era have the GET_PVINFO capability
1709 * only on PR, so we use this hack to determine the right
1710 * answer */
1711 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1712 /* PR - tell caller to allocate htab */
1713 return 0;
1714 } else {
1715 /* HV - assume 16MB kernel allocated htab */
1716 return 24;
1717 }
7f763a5d
DG
1718}
1719
a1e98583
DG
1720static inline uint32_t mfpvr(void)
1721{
1722 uint32_t pvr;
1723
1724 asm ("mfpvr %0"
1725 : "=r"(pvr));
1726 return pvr;
1727}
1728
a7342588
DG
1729static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1730{
1731 if (on) {
1732 *word |= flags;
1733 } else {
1734 *word &= ~flags;
1735 }
1736}
1737
2985b86b 1738static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 1739{
2985b86b
AF
1740 assert(kvm_enabled());
1741}
1742
1743static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1744{
1745 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
1746 uint32_t vmx = kvmppc_get_vmx();
1747 uint32_t dfp = kvmppc_get_dfp();
0cbad81f
DG
1748 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1749 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
a1e98583 1750
cfe34f44 1751 /* Now fix up the class with information we can query from the host */
3bc9ccc0 1752 pcc->pvr = mfpvr();
a7342588 1753
70bca53f
AG
1754 if (vmx != -1) {
1755 /* Only override when we know what the host supports */
cfe34f44
AF
1756 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1757 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
1758 }
1759 if (dfp != -1) {
1760 /* Only override when we know what the host supports */
cfe34f44 1761 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 1762 }
0cbad81f
DG
1763
1764 if (dcache_size != -1) {
1765 pcc->l1_dcache_size = dcache_size;
1766 }
1767
1768 if (icache_size != -1) {
1769 pcc->l1_icache_size = icache_size;
1770 }
a1e98583
DG
1771}
1772
3b961124
SY
1773bool kvmppc_has_cap_epr(void)
1774{
1775 return cap_epr;
1776}
1777
7c43bca0
AK
1778bool kvmppc_has_cap_htab_fd(void)
1779{
1780 return cap_htab_fd;
1781}
1782
5b79b1ca
AK
1783static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1784{
1785 ObjectClass *oc = OBJECT_CLASS(pcc);
1786
1787 while (oc && !object_class_is_abstract(oc)) {
1788 oc = object_class_get_parent(oc);
1789 }
1790 assert(oc);
1791
1792 return POWERPC_CPU_CLASS(oc);
1793}
1794
5ba4576b
AF
1795static int kvm_ppc_register_host_cpu_type(void)
1796{
1797 TypeInfo type_info = {
1798 .name = TYPE_HOST_POWERPC_CPU,
1799 .instance_init = kvmppc_host_cpu_initfn,
1800 .class_init = kvmppc_host_cpu_class_init,
1801 };
1802 uint32_t host_pvr = mfpvr();
1803 PowerPCCPUClass *pvr_pcc;
5b79b1ca 1804 DeviceClass *dc;
5ba4576b
AF
1805
1806 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
3bc9ccc0
AK
1807 if (pvr_pcc == NULL) {
1808 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1809 }
5ba4576b
AF
1810 if (pvr_pcc == NULL) {
1811 return -1;
1812 }
1813 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1814 type_register(&type_info);
5b79b1ca
AK
1815
1816 /* Register generic family CPU class for a family */
1817 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1818 dc = DEVICE_CLASS(pvr_pcc);
1819 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1820 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1821 type_register(&type_info);
1822
5ba4576b
AF
1823 return 0;
1824}
1825
feaa64c4
DG
1826int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1827{
1828 struct kvm_rtas_token_args args = {
1829 .token = token,
1830 };
1831
1832 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1833 return -ENOENT;
1834 }
1835
1836 strncpy(args.name, function, sizeof(args.name));
1837
1838 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1839}
12b1143b 1840
e68cb8b4
AK
1841int kvmppc_get_htab_fd(bool write)
1842{
1843 struct kvm_get_htab_fd s = {
1844 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1845 .start_index = 0,
1846 };
1847
1848 if (!cap_htab_fd) {
1849 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1850 return -1;
1851 }
1852
1853 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1854}
1855
1856int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1857{
bc72ad67 1858 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
e68cb8b4
AK
1859 uint8_t buf[bufsize];
1860 ssize_t rc;
1861
1862 do {
1863 rc = read(fd, buf, bufsize);
1864 if (rc < 0) {
1865 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1866 strerror(errno));
1867 return rc;
1868 } else if (rc) {
1869 /* Kernel already retuns data in BE format for the file */
1870 qemu_put_buffer(f, buf, rc);
1871 }
1872 } while ((rc != 0)
1873 && ((max_ns < 0)
bc72ad67 1874 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
e68cb8b4
AK
1875
1876 return (rc == 0) ? 1 : 0;
1877}
1878
1879int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1880 uint16_t n_valid, uint16_t n_invalid)
1881{
1882 struct kvm_get_htab_header *buf;
1883 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1884 ssize_t rc;
1885
1886 buf = alloca(chunksize);
1887 /* This is KVM on ppc, so this is all big-endian */
1888 buf->index = index;
1889 buf->n_valid = n_valid;
1890 buf->n_invalid = n_invalid;
1891
1892 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1893
1894 rc = write(fd, buf, chunksize);
1895 if (rc < 0) {
1896 fprintf(stderr, "Error writing KVM hash table: %s\n",
1897 strerror(errno));
1898 return rc;
1899 }
1900 if (rc != chunksize) {
1901 /* We should never get a short write on a single chunk */
1902 fprintf(stderr, "Short write, restoring KVM hash table\n");
1903 return -1;
1904 }
1905 return 0;
1906}
1907
20d695a9 1908bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
1909{
1910 return true;
1911}
a1b87fe0 1912
20d695a9 1913int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
1914{
1915 return 1;
1916}
1917
1918int kvm_arch_on_sigbus(int code, void *addr)
1919{
1920 return 1;
1921}
82169660
SW
1922
1923void kvm_arch_init_irq_routing(KVMState *s)
1924{
1925}
c65f9a07
GK
1926
1927int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1928{
1929 return -EINVAL;
1930}
1931
1932int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1933{
1934 return -EINVAL;
1935}
1936
1937int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1938{
1939 return -EINVAL;
1940}
1941
1942int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1943{
1944 return -EINVAL;
1945}
1946
1947void kvm_arch_remove_all_hw_breakpoints(void)
1948{
1949}
1950
1951void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1952{
1953}
7c43bca0
AK
1954
1955struct kvm_get_htab_buf {
1956 struct kvm_get_htab_header header;
1957 /*
1958 * We require one extra byte for read
1959 */
1960 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1961};
1962
1963uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1964{
1965 int htab_fd;
1966 struct kvm_get_htab_fd ghf;
1967 struct kvm_get_htab_buf *hpte_buf;
1968
1969 ghf.flags = 0;
1970 ghf.start_index = pte_index;
1971 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1972 if (htab_fd < 0) {
1973 goto error_out;
1974 }
1975
1976 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1977 /*
1978 * Read the hpte group
1979 */
1980 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1981 goto out_close;
1982 }
1983
1984 close(htab_fd);
1985 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1986
1987out_close:
1988 g_free(hpte_buf);
1989 close(htab_fd);
1990error_out:
1991 return 0;
1992}
1993
1994void kvmppc_hash64_free_pteg(uint64_t token)
1995{
1996 struct kvm_get_htab_buf *htab_buf;
1997
1998 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1999 hpte);
2000 g_free(htab_buf);
2001 return;
2002}
c1385933
AK
2003
2004void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2005 target_ulong pte0, target_ulong pte1)
2006{
2007 int htab_fd;
2008 struct kvm_get_htab_fd ghf;
2009 struct kvm_get_htab_buf hpte_buf;
2010
2011 ghf.flags = 0;
2012 ghf.start_index = 0; /* Ignored */
2013 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2014 if (htab_fd < 0) {
2015 goto error_out;
2016 }
2017
2018 hpte_buf.header.n_valid = 1;
2019 hpte_buf.header.n_invalid = 0;
2020 hpte_buf.header.index = pte_index;
2021 hpte_buf.hpte[0] = pte0;
2022 hpte_buf.hpte[1] = pte1;
2023 /*
2024 * Write the hpte entry.
2025 * CAUTION: write() has the warn_unused_result attribute. Hence we
2026 * need to check the return value, even though we do nothing.
2027 */
2028 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2029 goto out_close;
2030 }
2031
2032out_close:
2033 close(htab_fd);
2034 return;
2035
2036error_out:
2037 return;
2038}