]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/kvm.c
ppc: memory: Replace memory_region_init_ram with memory_region_allocate_system_memory
[mirror_qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
eadaada1 17#include <dirent.h>
d76d1650
AJ
18#include <sys/types.h>
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
1de7afc9 26#include "qemu/timer.h"
9c17d615
PB
27#include "sysemu/sysemu.h"
28#include "sysemu/kvm.h"
d76d1650
AJ
29#include "kvm_ppc.h"
30#include "cpu.h"
9c17d615
PB
31#include "sysemu/cpus.h"
32#include "sysemu/device_tree.h"
d5aea6f3 33#include "mmu-hash64.h"
d76d1650 34
f61b4bed 35#include "hw/sysbus.h"
0d09e41a
PB
36#include "hw/ppc/spapr.h"
37#include "hw/ppc/spapr_vio.h"
98a8b524 38#include "hw/ppc/ppc.h"
31f2cb8f 39#include "sysemu/watchdog.h"
b36f100e 40#include "trace.h"
f61b4bed 41
d76d1650
AJ
42//#define DEBUG_KVM
43
44#ifdef DEBUG_KVM
da56ff91 45#define DPRINTF(fmt, ...) \
d76d1650
AJ
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47#else
da56ff91 48#define DPRINTF(fmt, ...) \
d76d1650
AJ
49 do { } while (0)
50#endif
51
eadaada1
AG
52#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53
94a8d39a
JK
54const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
56};
57
fc87e185
AG
58static int cap_interrupt_unset = false;
59static int cap_interrupt_level = false;
90dc8812 60static int cap_segstate;
90dc8812 61static int cap_booke_sregs;
e97c3636 62static int cap_ppc_smt;
354ac20a 63static int cap_ppc_rma;
0f5cb298 64static int cap_spapr_tce;
da95324e 65static int cap_spapr_multitce;
9bb62a07 66static int cap_spapr_vfio;
f1af19d7 67static int cap_hior;
d67d40ea 68static int cap_one_reg;
3b961124 69static int cap_epr;
31f2cb8f 70static int cap_ppc_watchdog;
9b00ea49 71static int cap_papr;
e68cb8b4 72static int cap_htab_fd;
87a91de6 73static int cap_fixup_hcalls;
fc87e185 74
c821c2bd
AG
75/* XXX We have a race condition where we actually have a level triggered
76 * interrupt, but the infrastructure can't expose that yet, so the guest
77 * takes but ignores it, goes to sleep and never gets notified that there's
78 * still an interrupt pending.
c6a94ba5 79 *
c821c2bd
AG
80 * As a quick workaround, let's just wake up again 20 ms after we injected
81 * an interrupt. That way we can assure that we're always reinjecting
82 * interrupts in case the guest swallowed them.
c6a94ba5
AG
83 */
84static QEMUTimer *idle_timer;
85
d5a68146 86static void kvm_kick_cpu(void *opaque)
c6a94ba5 87{
d5a68146 88 PowerPCCPU *cpu = opaque;
d5a68146 89
c08d7424 90 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
91}
92
5ba4576b
AF
93static int kvm_ppc_register_host_cpu_type(void);
94
cad1e282 95int kvm_arch_init(KVMState *s)
d76d1650 96{
fc87e185 97 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 98 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 99 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 100 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 101 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 102 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 103 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
da95324e 104 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
9bb62a07 105 cap_spapr_vfio = false;
d67d40ea 106 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 107 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
3b961124 108 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
31f2cb8f 109 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
9b00ea49
DG
110 /* Note: we don't set cap_papr here, because this capability is
111 * only activated after this by kvmppc_set_papr() */
e68cb8b4 112 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
87a91de6 113 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
fc87e185
AG
114
115 if (!cap_interrupt_level) {
116 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
117 "VM to stall at times!\n");
118 }
119
5ba4576b
AF
120 kvm_ppc_register_host_cpu_type();
121
d76d1650
AJ
122 return 0;
123}
124
1bc22652 125static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 126{
1bc22652
AF
127 CPUPPCState *cenv = &cpu->env;
128 CPUState *cs = CPU(cpu);
861bbc80 129 struct kvm_sregs sregs;
5666ca4a
SW
130 int ret;
131
132 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
133 /* What we're really trying to say is "if we're on BookE, we use
134 the native PVR for now". This is the only sane way to check
135 it though, so we potentially confuse users that they can run
136 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
137 return 0;
138 } else {
90dc8812 139 if (!cap_segstate) {
64e07be5
AG
140 fprintf(stderr, "kvm error: missing PVR setting capability\n");
141 return -ENOSYS;
5666ca4a 142 }
5666ca4a
SW
143 }
144
1bc22652 145 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
146 if (ret) {
147 return ret;
148 }
861bbc80
AG
149
150 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 151 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
152}
153
93dd5e85 154/* Set up a shared TLB array with KVM */
1bc22652 155static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 156{
1bc22652
AF
157 CPUPPCState *env = &cpu->env;
158 CPUState *cs = CPU(cpu);
93dd5e85
SW
159 struct kvm_book3e_206_tlb_params params = {};
160 struct kvm_config_tlb cfg = {};
93dd5e85
SW
161 unsigned int entries = 0;
162 int ret, i;
163
164 if (!kvm_enabled() ||
a60f24b5 165 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
166 return 0;
167 }
168
169 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
170
171 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
172 params.tlb_sizes[i] = booke206_tlb_size(env, i);
173 params.tlb_ways[i] = booke206_tlb_ways(env, i);
174 entries += params.tlb_sizes[i];
175 }
176
177 assert(entries == env->nb_tlb);
178 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
179
180 env->tlb_dirty = true;
181
182 cfg.array = (uintptr_t)env->tlb.tlbm;
183 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
184 cfg.params = (uintptr_t)&params;
185 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
186
48add816 187 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
93dd5e85
SW
188 if (ret < 0) {
189 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
190 __func__, strerror(-ret));
191 return ret;
192 }
193
194 env->kvm_sw_tlb = true;
195 return 0;
196}
197
4656e1f0
BH
198
199#if defined(TARGET_PPC64)
a60f24b5 200static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
201 struct kvm_ppc_smmu_info *info)
202{
a60f24b5
AF
203 CPUPPCState *env = &cpu->env;
204 CPUState *cs = CPU(cpu);
205
4656e1f0
BH
206 memset(info, 0, sizeof(*info));
207
208 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
209 * need to "guess" what the supported page sizes are.
210 *
211 * For that to work we make a few assumptions:
212 *
213 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
214 * KVM which only supports 4K and 16M pages, but supports them
215 * regardless of the backing store characteritics. We also don't
216 * support 1T segments.
217 *
218 * This is safe as if HV KVM ever supports that capability or PR
219 * KVM grows supports for more page/segment sizes, those versions
220 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
221 * will not hit this fallback
222 *
223 * - Else we are running HV KVM. This means we only support page
224 * sizes that fit in the backing store. Additionally we only
225 * advertize 64K pages if the processor is ARCH 2.06 and we assume
226 * P7 encodings for the SLB and hash table. Here too, we assume
227 * support for any newer processor will mean a kernel that
228 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
229 * this fallback.
230 */
a60f24b5 231 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
232 /* No flags */
233 info->flags = 0;
234 info->slb_size = 64;
235
236 /* Standard 4k base page size segment */
237 info->sps[0].page_shift = 12;
238 info->sps[0].slb_enc = 0;
239 info->sps[0].enc[0].page_shift = 12;
240 info->sps[0].enc[0].pte_enc = 0;
241
242 /* Standard 16M large page size segment */
243 info->sps[1].page_shift = 24;
244 info->sps[1].slb_enc = SLB_VSID_L;
245 info->sps[1].enc[0].page_shift = 24;
246 info->sps[1].enc[0].pte_enc = 0;
247 } else {
248 int i = 0;
249
250 /* HV KVM has backing store size restrictions */
251 info->flags = KVM_PPC_PAGE_SIZES_REAL;
252
253 if (env->mmu_model & POWERPC_MMU_1TSEG) {
254 info->flags |= KVM_PPC_1T_SEGMENTS;
255 }
256
257 if (env->mmu_model == POWERPC_MMU_2_06) {
258 info->slb_size = 32;
259 } else {
260 info->slb_size = 64;
261 }
262
263 /* Standard 4k base page size segment */
264 info->sps[i].page_shift = 12;
265 info->sps[i].slb_enc = 0;
266 info->sps[i].enc[0].page_shift = 12;
267 info->sps[i].enc[0].pte_enc = 0;
268 i++;
269
270 /* 64K on MMU 2.06 */
271 if (env->mmu_model == POWERPC_MMU_2_06) {
272 info->sps[i].page_shift = 16;
273 info->sps[i].slb_enc = 0x110;
274 info->sps[i].enc[0].page_shift = 16;
275 info->sps[i].enc[0].pte_enc = 1;
276 i++;
277 }
278
279 /* Standard 16M large page size segment */
280 info->sps[i].page_shift = 24;
281 info->sps[i].slb_enc = SLB_VSID_L;
282 info->sps[i].enc[0].page_shift = 24;
283 info->sps[i].enc[0].pte_enc = 0;
284 }
285}
286
a60f24b5 287static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 288{
a60f24b5 289 CPUState *cs = CPU(cpu);
4656e1f0
BH
290 int ret;
291
a60f24b5
AF
292 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
293 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
294 if (ret == 0) {
295 return;
296 }
297 }
298
a60f24b5 299 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
300}
301
302static long getrampagesize(void)
303{
304 struct statfs fs;
305 int ret;
306
307 if (!mem_path) {
308 /* guest RAM is backed by normal anonymous pages */
309 return getpagesize();
310 }
311
312 do {
313 ret = statfs(mem_path, &fs);
314 } while (ret != 0 && errno == EINTR);
315
316 if (ret != 0) {
317 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
318 strerror(errno));
319 exit(1);
320 }
321
322#define HUGETLBFS_MAGIC 0x958458f6
323
324 if (fs.f_type != HUGETLBFS_MAGIC) {
325 /* Explicit mempath, but it's ordinary pages */
326 return getpagesize();
327 }
328
329 /* It's hugepage, return the huge page size */
330 return fs.f_bsize;
331}
332
333static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
334{
335 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
336 return true;
337 }
338
339 return (1ul << shift) <= rampgsize;
340}
341
a60f24b5 342static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
343{
344 static struct kvm_ppc_smmu_info smmu_info;
345 static bool has_smmu_info;
a60f24b5 346 CPUPPCState *env = &cpu->env;
4656e1f0
BH
347 long rampagesize;
348 int iq, ik, jq, jk;
349
350 /* We only handle page sizes for 64-bit server guests for now */
351 if (!(env->mmu_model & POWERPC_MMU_64)) {
352 return;
353 }
354
355 /* Collect MMU info from kernel if not already */
356 if (!has_smmu_info) {
a60f24b5 357 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
358 has_smmu_info = true;
359 }
360
361 rampagesize = getrampagesize();
362
363 /* Convert to QEMU form */
364 memset(&env->sps, 0, sizeof(env->sps));
365
08215d8f
AG
366 /*
367 * XXX This loop should be an entry wide AND of the capabilities that
368 * the selected CPU has with the capabilities that KVM supports.
369 */
4656e1f0
BH
370 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
371 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
372 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
373
374 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
375 ksps->page_shift)) {
376 continue;
377 }
378 qsps->page_shift = ksps->page_shift;
379 qsps->slb_enc = ksps->slb_enc;
380 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
381 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
382 ksps->enc[jk].page_shift)) {
383 continue;
384 }
385 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
386 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
387 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
388 break;
389 }
390 }
391 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
392 break;
393 }
394 }
395 env->slb_nr = smmu_info.slb_size;
08215d8f 396 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
4656e1f0
BH
397 env->mmu_model &= ~POWERPC_MMU_1TSEG;
398 }
399}
400#else /* defined (TARGET_PPC64) */
401
a60f24b5 402static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
403{
404}
405
406#endif /* !defined (TARGET_PPC64) */
407
b164e48e
EH
408unsigned long kvm_arch_vcpu_id(CPUState *cpu)
409{
0f20ba62 410 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
b164e48e
EH
411}
412
20d695a9 413int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 414{
20d695a9
AF
415 PowerPCCPU *cpu = POWERPC_CPU(cs);
416 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
417 int ret;
418
4656e1f0 419 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 420 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
421
422 /* Synchronize sregs with kvm */
1bc22652 423 ret = kvm_arch_sync_sregs(cpu);
5666ca4a
SW
424 if (ret) {
425 return ret;
426 }
861bbc80 427
bc72ad67 428 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
c821c2bd 429
93dd5e85
SW
430 /* Some targets support access to KVM's guest TLB. */
431 switch (cenv->mmu_model) {
432 case POWERPC_MMU_BOOKE206:
1bc22652 433 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
434 break;
435 default:
436 break;
437 }
438
861bbc80 439 return ret;
d76d1650
AJ
440}
441
1bc22652 442static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 443{
1bc22652
AF
444 CPUPPCState *env = &cpu->env;
445 CPUState *cs = CPU(cpu);
93dd5e85
SW
446 struct kvm_dirty_tlb dirty_tlb;
447 unsigned char *bitmap;
448 int ret;
449
450 if (!env->kvm_sw_tlb) {
451 return;
452 }
453
454 bitmap = g_malloc((env->nb_tlb + 7) / 8);
455 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
456
457 dirty_tlb.bitmap = (uintptr_t)bitmap;
458 dirty_tlb.num_dirty = env->nb_tlb;
459
1bc22652 460 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
461 if (ret) {
462 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
463 __func__, strerror(-ret));
464 }
465
466 g_free(bitmap);
467}
468
d67d40ea
DG
469static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
470{
471 PowerPCCPU *cpu = POWERPC_CPU(cs);
472 CPUPPCState *env = &cpu->env;
473 union {
474 uint32_t u32;
475 uint64_t u64;
476 } val;
477 struct kvm_one_reg reg = {
478 .id = id,
479 .addr = (uintptr_t) &val,
480 };
481 int ret;
482
483 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
484 if (ret != 0) {
b36f100e 485 trace_kvm_failed_spr_get(spr, strerror(errno));
d67d40ea
DG
486 } else {
487 switch (id & KVM_REG_SIZE_MASK) {
488 case KVM_REG_SIZE_U32:
489 env->spr[spr] = val.u32;
490 break;
491
492 case KVM_REG_SIZE_U64:
493 env->spr[spr] = val.u64;
494 break;
495
496 default:
497 /* Don't handle this size yet */
498 abort();
499 }
500 }
501}
502
503static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
504{
505 PowerPCCPU *cpu = POWERPC_CPU(cs);
506 CPUPPCState *env = &cpu->env;
507 union {
508 uint32_t u32;
509 uint64_t u64;
510 } val;
511 struct kvm_one_reg reg = {
512 .id = id,
513 .addr = (uintptr_t) &val,
514 };
515 int ret;
516
517 switch (id & KVM_REG_SIZE_MASK) {
518 case KVM_REG_SIZE_U32:
519 val.u32 = env->spr[spr];
520 break;
521
522 case KVM_REG_SIZE_U64:
523 val.u64 = env->spr[spr];
524 break;
525
526 default:
527 /* Don't handle this size yet */
528 abort();
529 }
530
531 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
532 if (ret != 0) {
b36f100e 533 trace_kvm_failed_spr_set(spr, strerror(errno));
d67d40ea
DG
534 }
535}
536
70b79849
DG
537static int kvm_put_fp(CPUState *cs)
538{
539 PowerPCCPU *cpu = POWERPC_CPU(cs);
540 CPUPPCState *env = &cpu->env;
541 struct kvm_one_reg reg;
542 int i;
543 int ret;
544
545 if (env->insns_flags & PPC_FLOAT) {
546 uint64_t fpscr = env->fpscr;
547 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
548
549 reg.id = KVM_REG_PPC_FPSCR;
550 reg.addr = (uintptr_t)&fpscr;
551 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
552 if (ret < 0) {
da56ff91 553 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
70b79849
DG
554 return ret;
555 }
556
557 for (i = 0; i < 32; i++) {
558 uint64_t vsr[2];
559
560 vsr[0] = float64_val(env->fpr[i]);
561 vsr[1] = env->vsr[i];
562 reg.addr = (uintptr_t) &vsr;
563 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
564
565 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
566 if (ret < 0) {
da56ff91 567 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
70b79849
DG
568 i, strerror(errno));
569 return ret;
570 }
571 }
572 }
573
574 if (env->insns_flags & PPC_ALTIVEC) {
575 reg.id = KVM_REG_PPC_VSCR;
576 reg.addr = (uintptr_t)&env->vscr;
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
da56ff91 579 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
70b79849
DG
580 return ret;
581 }
582
583 for (i = 0; i < 32; i++) {
584 reg.id = KVM_REG_PPC_VR(i);
585 reg.addr = (uintptr_t)&env->avr[i];
586 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
587 if (ret < 0) {
da56ff91 588 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
70b79849
DG
589 return ret;
590 }
591 }
592 }
593
594 return 0;
595}
596
597static int kvm_get_fp(CPUState *cs)
598{
599 PowerPCCPU *cpu = POWERPC_CPU(cs);
600 CPUPPCState *env = &cpu->env;
601 struct kvm_one_reg reg;
602 int i;
603 int ret;
604
605 if (env->insns_flags & PPC_FLOAT) {
606 uint64_t fpscr;
607 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
608
609 reg.id = KVM_REG_PPC_FPSCR;
610 reg.addr = (uintptr_t)&fpscr;
611 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
612 if (ret < 0) {
da56ff91 613 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
70b79849
DG
614 return ret;
615 } else {
616 env->fpscr = fpscr;
617 }
618
619 for (i = 0; i < 32; i++) {
620 uint64_t vsr[2];
621
622 reg.addr = (uintptr_t) &vsr;
623 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
624
625 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
626 if (ret < 0) {
da56ff91 627 DPRINTF("Unable to get %s%d from KVM: %s\n",
70b79849
DG
628 vsx ? "VSR" : "FPR", i, strerror(errno));
629 return ret;
630 } else {
631 env->fpr[i] = vsr[0];
632 if (vsx) {
633 env->vsr[i] = vsr[1];
634 }
635 }
636 }
637 }
638
639 if (env->insns_flags & PPC_ALTIVEC) {
640 reg.id = KVM_REG_PPC_VSCR;
641 reg.addr = (uintptr_t)&env->vscr;
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
da56ff91 644 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
70b79849
DG
645 return ret;
646 }
647
648 for (i = 0; i < 32; i++) {
649 reg.id = KVM_REG_PPC_VR(i);
650 reg.addr = (uintptr_t)&env->avr[i];
651 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
652 if (ret < 0) {
da56ff91 653 DPRINTF("Unable to get VR%d from KVM: %s\n",
70b79849
DG
654 i, strerror(errno));
655 return ret;
656 }
657 }
658 }
659
660 return 0;
661}
662
9b00ea49
DG
663#if defined(TARGET_PPC64)
664static int kvm_get_vpa(CPUState *cs)
665{
666 PowerPCCPU *cpu = POWERPC_CPU(cs);
667 CPUPPCState *env = &cpu->env;
668 struct kvm_one_reg reg;
669 int ret;
670
671 reg.id = KVM_REG_PPC_VPA_ADDR;
672 reg.addr = (uintptr_t)&env->vpa_addr;
673 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
674 if (ret < 0) {
da56ff91 675 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
9b00ea49
DG
676 return ret;
677 }
678
679 assert((uintptr_t)&env->slb_shadow_size
680 == ((uintptr_t)&env->slb_shadow_addr + 8));
681 reg.id = KVM_REG_PPC_VPA_SLB;
682 reg.addr = (uintptr_t)&env->slb_shadow_addr;
683 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
684 if (ret < 0) {
da56ff91 685 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
9b00ea49
DG
686 strerror(errno));
687 return ret;
688 }
689
690 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
691 reg.id = KVM_REG_PPC_VPA_DTL;
692 reg.addr = (uintptr_t)&env->dtl_addr;
693 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
694 if (ret < 0) {
da56ff91 695 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
9b00ea49
DG
696 strerror(errno));
697 return ret;
698 }
699
700 return 0;
701}
702
703static int kvm_put_vpa(CPUState *cs)
704{
705 PowerPCCPU *cpu = POWERPC_CPU(cs);
706 CPUPPCState *env = &cpu->env;
707 struct kvm_one_reg reg;
708 int ret;
709
710 /* SLB shadow or DTL can't be registered unless a master VPA is
711 * registered. That means when restoring state, if a VPA *is*
712 * registered, we need to set that up first. If not, we need to
713 * deregister the others before deregistering the master VPA */
714 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
715
716 if (env->vpa_addr) {
717 reg.id = KVM_REG_PPC_VPA_ADDR;
718 reg.addr = (uintptr_t)&env->vpa_addr;
719 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
720 if (ret < 0) {
da56ff91 721 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
722 return ret;
723 }
724 }
725
726 assert((uintptr_t)&env->slb_shadow_size
727 == ((uintptr_t)&env->slb_shadow_addr + 8));
728 reg.id = KVM_REG_PPC_VPA_SLB;
729 reg.addr = (uintptr_t)&env->slb_shadow_addr;
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
731 if (ret < 0) {
da56ff91 732 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
9b00ea49
DG
733 return ret;
734 }
735
736 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
737 reg.id = KVM_REG_PPC_VPA_DTL;
738 reg.addr = (uintptr_t)&env->dtl_addr;
739 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
740 if (ret < 0) {
da56ff91 741 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
9b00ea49
DG
742 strerror(errno));
743 return ret;
744 }
745
746 if (!env->vpa_addr) {
747 reg.id = KVM_REG_PPC_VPA_ADDR;
748 reg.addr = (uintptr_t)&env->vpa_addr;
749 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
750 if (ret < 0) {
da56ff91 751 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
752 return ret;
753 }
754 }
755
756 return 0;
757}
758#endif /* TARGET_PPC64 */
759
20d695a9 760int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 761{
20d695a9
AF
762 PowerPCCPU *cpu = POWERPC_CPU(cs);
763 CPUPPCState *env = &cpu->env;
d76d1650
AJ
764 struct kvm_regs regs;
765 int ret;
766 int i;
767
1bc22652
AF
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
769 if (ret < 0) {
d76d1650 770 return ret;
1bc22652 771 }
d76d1650
AJ
772
773 regs.ctr = env->ctr;
774 regs.lr = env->lr;
da91a00f 775 regs.xer = cpu_read_xer(env);
d76d1650
AJ
776 regs.msr = env->msr;
777 regs.pc = env->nip;
778
779 regs.srr0 = env->spr[SPR_SRR0];
780 regs.srr1 = env->spr[SPR_SRR1];
781
782 regs.sprg0 = env->spr[SPR_SPRG0];
783 regs.sprg1 = env->spr[SPR_SPRG1];
784 regs.sprg2 = env->spr[SPR_SPRG2];
785 regs.sprg3 = env->spr[SPR_SPRG3];
786 regs.sprg4 = env->spr[SPR_SPRG4];
787 regs.sprg5 = env->spr[SPR_SPRG5];
788 regs.sprg6 = env->spr[SPR_SPRG6];
789 regs.sprg7 = env->spr[SPR_SPRG7];
790
90dc8812
SW
791 regs.pid = env->spr[SPR_BOOKE_PID];
792
d76d1650
AJ
793 for (i = 0;i < 32; i++)
794 regs.gpr[i] = env->gpr[i];
795
4bddaf55
AK
796 regs.cr = 0;
797 for (i = 0; i < 8; i++) {
798 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
799 }
800
1bc22652 801 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
802 if (ret < 0)
803 return ret;
804
70b79849
DG
805 kvm_put_fp(cs);
806
93dd5e85 807 if (env->tlb_dirty) {
1bc22652 808 kvm_sw_tlb_put(cpu);
93dd5e85
SW
809 env->tlb_dirty = false;
810 }
811
f1af19d7
DG
812 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
813 struct kvm_sregs sregs;
814
815 sregs.pvr = env->spr[SPR_PVR];
816
817 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
818
819 /* Sync SLB */
820#ifdef TARGET_PPC64
d83af167 821 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
f1af19d7 822 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
69b31b90
AK
823 if (env->slb[i].esid & SLB_ESID_V) {
824 sregs.u.s.ppc64.slb[i].slbe |= i;
825 }
f1af19d7
DG
826 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
827 }
828#endif
829
830 /* Sync SRs */
831 for (i = 0; i < 16; i++) {
832 sregs.u.s.ppc32.sr[i] = env->sr[i];
833 }
834
835 /* Sync BATs */
836 for (i = 0; i < 8; i++) {
ef8beb0e
AG
837 /* Beware. We have to swap upper and lower bits here */
838 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
839 | env->DBAT[1][i];
840 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
841 | env->IBAT[1][i];
f1af19d7
DG
842 }
843
1bc22652 844 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
f1af19d7
DG
845 if (ret) {
846 return ret;
847 }
848 }
849
850 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
851 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
852 }
f1af19d7 853
d67d40ea
DG
854 if (cap_one_reg) {
855 int i;
856
857 /* We deliberately ignore errors here, for kernels which have
858 * the ONE_REG calls, but don't support the specific
859 * registers, there's a reasonable chance things will still
860 * work, at least until we try to migrate. */
861 for (i = 0; i < 1024; i++) {
862 uint64_t id = env->spr_cb[i].one_reg_id;
863
864 if (id != 0) {
865 kvm_put_one_spr(cs, id, i);
866 }
f1af19d7 867 }
9b00ea49
DG
868
869#ifdef TARGET_PPC64
80b3f79b
AK
870 if (msr_ts) {
871 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
872 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
873 }
874 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
875 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
876 }
877 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
878 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
879 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
880 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
881 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
882 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
883 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
884 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
885 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
886 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
887 }
888
9b00ea49
DG
889 if (cap_papr) {
890 if (kvm_put_vpa(cs) < 0) {
da56ff91 891 DPRINTF("Warning: Unable to set VPA information to KVM\n");
9b00ea49
DG
892 }
893 }
98a8b524
AK
894
895 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 896#endif /* TARGET_PPC64 */
f1af19d7
DG
897 }
898
d76d1650
AJ
899 return ret;
900}
901
20d695a9 902int kvm_arch_get_registers(CPUState *cs)
d76d1650 903{
20d695a9
AF
904 PowerPCCPU *cpu = POWERPC_CPU(cs);
905 CPUPPCState *env = &cpu->env;
d76d1650 906 struct kvm_regs regs;
ba5e5090 907 struct kvm_sregs sregs;
90dc8812 908 uint32_t cr;
138b38b6 909 int i, ret;
d76d1650 910
1bc22652 911 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
912 if (ret < 0)
913 return ret;
914
90dc8812
SW
915 cr = regs.cr;
916 for (i = 7; i >= 0; i--) {
917 env->crf[i] = cr & 15;
918 cr >>= 4;
919 }
ba5e5090 920
d76d1650
AJ
921 env->ctr = regs.ctr;
922 env->lr = regs.lr;
da91a00f 923 cpu_write_xer(env, regs.xer);
d76d1650
AJ
924 env->msr = regs.msr;
925 env->nip = regs.pc;
926
927 env->spr[SPR_SRR0] = regs.srr0;
928 env->spr[SPR_SRR1] = regs.srr1;
929
930 env->spr[SPR_SPRG0] = regs.sprg0;
931 env->spr[SPR_SPRG1] = regs.sprg1;
932 env->spr[SPR_SPRG2] = regs.sprg2;
933 env->spr[SPR_SPRG3] = regs.sprg3;
934 env->spr[SPR_SPRG4] = regs.sprg4;
935 env->spr[SPR_SPRG5] = regs.sprg5;
936 env->spr[SPR_SPRG6] = regs.sprg6;
937 env->spr[SPR_SPRG7] = regs.sprg7;
938
90dc8812
SW
939 env->spr[SPR_BOOKE_PID] = regs.pid;
940
d76d1650
AJ
941 for (i = 0;i < 32; i++)
942 env->gpr[i] = regs.gpr[i];
943
70b79849
DG
944 kvm_get_fp(cs);
945
90dc8812 946 if (cap_booke_sregs) {
1bc22652 947 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
948 if (ret < 0) {
949 return ret;
950 }
951
952 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
953 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
954 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
955 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
956 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
957 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
958 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
959 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
960 env->spr[SPR_DECR] = sregs.u.e.dec;
961 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
962 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
963 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
964 }
965
966 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
967 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
968 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
969 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
970 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
971 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
972 }
973
974 if (sregs.u.e.features & KVM_SREGS_E_64) {
975 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
976 }
977
978 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
979 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
980 }
981
982 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
983 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
984 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
985 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
986 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
987 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
988 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
989 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
990 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
991 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
992 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
993 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
994 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
995 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
996 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
997 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
998 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
999
1000 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1001 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1002 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1003 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1004 }
1005
1006 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1007 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1008 }
1009
1010 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1011 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1012 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1013 }
1014 }
1015
1016 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1017 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1018 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1019 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1020 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1021 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1022 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1023 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1024 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1025 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1026 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1027 }
1028
1029 if (sregs.u.e.features & KVM_SREGS_EXP) {
1030 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1031 }
1032
1033 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1034 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1035 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1036 }
1037
1038 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1039 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1040 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1041 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1042
1043 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1044 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1045 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1046 }
1047 }
fafc0b6a 1048 }
90dc8812 1049
90dc8812 1050 if (cap_segstate) {
1bc22652 1051 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
90dc8812
SW
1052 if (ret < 0) {
1053 return ret;
1054 }
1055
f3c75d42
AK
1056 if (!env->external_htab) {
1057 ppc_store_sdr1(env, sregs.u.s.sdr1);
1058 }
ba5e5090
AG
1059
1060 /* Sync SLB */
82c09f2f 1061#ifdef TARGET_PPC64
4b4d4a21
AK
1062 /*
1063 * The packed SLB array we get from KVM_GET_SREGS only contains
1064 * information about valid entries. So we flush our internal
1065 * copy to get rid of stale ones, then put all valid SLB entries
1066 * back in.
1067 */
1068 memset(env->slb, 0, sizeof(env->slb));
d83af167 1069 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
4b4d4a21
AK
1070 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1071 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1072 /*
1073 * Only restore valid entries
1074 */
1075 if (rb & SLB_ESID_V) {
1076 ppc_store_slb(env, rb, rs);
1077 }
ba5e5090 1078 }
82c09f2f 1079#endif
ba5e5090
AG
1080
1081 /* Sync SRs */
1082 for (i = 0; i < 16; i++) {
1083 env->sr[i] = sregs.u.s.ppc32.sr[i];
1084 }
1085
1086 /* Sync BATs */
1087 for (i = 0; i < 8; i++) {
1088 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1089 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1090 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1091 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1092 }
fafc0b6a 1093 }
ba5e5090 1094
d67d40ea
DG
1095 if (cap_hior) {
1096 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1097 }
1098
1099 if (cap_one_reg) {
1100 int i;
1101
1102 /* We deliberately ignore errors here, for kernels which have
1103 * the ONE_REG calls, but don't support the specific
1104 * registers, there's a reasonable chance things will still
1105 * work, at least until we try to migrate. */
1106 for (i = 0; i < 1024; i++) {
1107 uint64_t id = env->spr_cb[i].one_reg_id;
1108
1109 if (id != 0) {
1110 kvm_get_one_spr(cs, id, i);
1111 }
1112 }
9b00ea49
DG
1113
1114#ifdef TARGET_PPC64
80b3f79b
AK
1115 if (msr_ts) {
1116 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1117 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1118 }
1119 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1120 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1121 }
1122 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1123 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1124 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1125 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1126 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1127 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1128 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1129 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1130 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1131 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1132 }
1133
9b00ea49
DG
1134 if (cap_papr) {
1135 if (kvm_get_vpa(cs) < 0) {
da56ff91 1136 DPRINTF("Warning: Unable to get VPA information from KVM\n");
9b00ea49
DG
1137 }
1138 }
98a8b524
AK
1139
1140 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1141#endif
d67d40ea
DG
1142 }
1143
d76d1650
AJ
1144 return 0;
1145}
1146
1bc22652 1147int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
1148{
1149 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1150
1151 if (irq != PPC_INTERRUPT_EXT) {
1152 return 0;
1153 }
1154
1155 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1156 return 0;
1157 }
1158
1bc22652 1159 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
1160
1161 return 0;
1162}
1163
16415335
AG
1164#if defined(TARGET_PPCEMB)
1165#define PPC_INPUT_INT PPC40x_INPUT_INT
1166#elif defined(TARGET_PPC64)
1167#define PPC_INPUT_INT PPC970_INPUT_INT
1168#else
1169#define PPC_INPUT_INT PPC6xx_INPUT_INT
1170#endif
1171
20d695a9 1172void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 1173{
20d695a9
AF
1174 PowerPCCPU *cpu = POWERPC_CPU(cs);
1175 CPUPPCState *env = &cpu->env;
d76d1650
AJ
1176 int r;
1177 unsigned irq;
1178
5cbdb3a3 1179 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 1180 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
1181 if (!cap_interrupt_level &&
1182 run->ready_for_interrupt_injection &&
259186a7 1183 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 1184 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
1185 {
1186 /* For now KVM disregards the 'irq' argument. However, in the
1187 * future KVM could cache it in-kernel to avoid a heavyweight exit
1188 * when reading the UIC.
1189 */
fc87e185 1190 irq = KVM_INTERRUPT_SET;
d76d1650 1191
da56ff91 1192 DPRINTF("injected interrupt %d\n", irq);
1bc22652 1193 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1194 if (r < 0) {
1195 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1196 }
c821c2bd
AG
1197
1198 /* Always wake up soon in case the interrupt was level based */
bc72ad67 1199 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
c821c2bd 1200 (get_ticks_per_sec() / 50));
d76d1650
AJ
1201 }
1202
1203 /* We don't know if there are more interrupts pending after this. However,
1204 * the guest will return to userspace in the course of handling this one
1205 * anyways, so we will get a chance to deliver the rest. */
d76d1650
AJ
1206}
1207
20d695a9 1208void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
d76d1650 1209{
d76d1650
AJ
1210}
1211
20d695a9 1212int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1213{
259186a7 1214 return cs->halted;
0af691d7
MT
1215}
1216
259186a7 1217static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1218{
259186a7
AF
1219 CPUState *cs = CPU(cpu);
1220 CPUPPCState *env = &cpu->env;
1221
1222 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1223 cs->halted = 1;
27103424 1224 cs->exception_index = EXCP_HLT;
d76d1650
AJ
1225 }
1226
bb4ea393 1227 return 0;
d76d1650
AJ
1228}
1229
1230/* map dcr access to existing qemu dcr emulation */
1328c2bf 1231static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1232{
1233 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1234 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1235
bb4ea393 1236 return 0;
d76d1650
AJ
1237}
1238
1328c2bf 1239static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1240{
1241 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1242 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1243
bb4ea393 1244 return 0;
d76d1650
AJ
1245}
1246
20d695a9 1247int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1248{
20d695a9
AF
1249 PowerPCCPU *cpu = POWERPC_CPU(cs);
1250 CPUPPCState *env = &cpu->env;
bb4ea393 1251 int ret;
d76d1650
AJ
1252
1253 switch (run->exit_reason) {
1254 case KVM_EXIT_DCR:
1255 if (run->dcr.is_write) {
da56ff91 1256 DPRINTF("handle dcr write\n");
d76d1650
AJ
1257 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1258 } else {
da56ff91 1259 DPRINTF("handle dcr read\n");
d76d1650
AJ
1260 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1261 }
1262 break;
1263 case KVM_EXIT_HLT:
da56ff91 1264 DPRINTF("handle halt\n");
259186a7 1265 ret = kvmppc_handle_halt(cpu);
d76d1650 1266 break;
c6304a4a 1267#if defined(TARGET_PPC64)
f61b4bed 1268 case KVM_EXIT_PAPR_HCALL:
da56ff91 1269 DPRINTF("handle PAPR hypercall\n");
20d695a9 1270 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1271 run->papr_hcall.nr,
f61b4bed 1272 run->papr_hcall.args);
78e8fde2 1273 ret = 0;
f61b4bed
AG
1274 break;
1275#endif
5b95b8b9 1276 case KVM_EXIT_EPR:
da56ff91 1277 DPRINTF("handle epr\n");
933b19ea 1278 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
5b95b8b9
AG
1279 ret = 0;
1280 break;
31f2cb8f 1281 case KVM_EXIT_WATCHDOG:
da56ff91 1282 DPRINTF("handle watchdog expiry\n");
31f2cb8f
BB
1283 watchdog_perform_action();
1284 ret = 0;
1285 break;
1286
73aaec4a
JK
1287 default:
1288 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1289 ret = -1;
1290 break;
d76d1650
AJ
1291 }
1292
1293 return ret;
1294}
1295
31f2cb8f
BB
1296int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1297{
1298 CPUState *cs = CPU(cpu);
1299 uint32_t bits = tsr_bits;
1300 struct kvm_one_reg reg = {
1301 .id = KVM_REG_PPC_OR_TSR,
1302 .addr = (uintptr_t) &bits,
1303 };
1304
1305 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1306}
1307
1308int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1309{
1310
1311 CPUState *cs = CPU(cpu);
1312 uint32_t bits = tsr_bits;
1313 struct kvm_one_reg reg = {
1314 .id = KVM_REG_PPC_CLEAR_TSR,
1315 .addr = (uintptr_t) &bits,
1316 };
1317
1318 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1319}
1320
1321int kvmppc_set_tcr(PowerPCCPU *cpu)
1322{
1323 CPUState *cs = CPU(cpu);
1324 CPUPPCState *env = &cpu->env;
1325 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1326
1327 struct kvm_one_reg reg = {
1328 .id = KVM_REG_PPC_TCR,
1329 .addr = (uintptr_t) &tcr,
1330 };
1331
1332 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1333}
1334
1335int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1336{
1337 CPUState *cs = CPU(cpu);
31f2cb8f
BB
1338 int ret;
1339
1340 if (!kvm_enabled()) {
1341 return -1;
1342 }
1343
1344 if (!cap_ppc_watchdog) {
1345 printf("warning: KVM does not support watchdog");
1346 return -1;
1347 }
1348
48add816 1349 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
31f2cb8f
BB
1350 if (ret < 0) {
1351 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1352 __func__, strerror(-ret));
1353 return ret;
1354 }
1355
1356 return ret;
1357}
1358
dc333cd6
AG
1359static int read_cpuinfo(const char *field, char *value, int len)
1360{
1361 FILE *f;
1362 int ret = -1;
1363 int field_len = strlen(field);
1364 char line[512];
1365
1366 f = fopen("/proc/cpuinfo", "r");
1367 if (!f) {
1368 return -1;
1369 }
1370
1371 do {
1372 if(!fgets(line, sizeof(line), f)) {
1373 break;
1374 }
1375 if (!strncmp(line, field, field_len)) {
ae215068 1376 pstrcpy(value, len, line);
dc333cd6
AG
1377 ret = 0;
1378 break;
1379 }
1380 } while(*line);
1381
1382 fclose(f);
1383
1384 return ret;
1385}
1386
1387uint32_t kvmppc_get_tbfreq(void)
1388{
1389 char line[512];
1390 char *ns;
1391 uint32_t retval = get_ticks_per_sec();
1392
1393 if (read_cpuinfo("timebase", line, sizeof(line))) {
1394 return retval;
1395 }
1396
1397 if (!(ns = strchr(line, ':'))) {
1398 return retval;
1399 }
1400
1401 ns++;
1402
1403 retval = atoi(ns);
1404 return retval;
1405}
4513d923 1406
eadaada1
AG
1407/* Try to find a device tree node for a CPU with clock-frequency property */
1408static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1409{
1410 struct dirent *dirp;
1411 DIR *dp;
1412
1413 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1414 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1415 return -1;
1416 }
1417
1418 buf[0] = '\0';
1419 while ((dirp = readdir(dp)) != NULL) {
1420 FILE *f;
1421 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1422 dirp->d_name);
1423 f = fopen(buf, "r");
1424 if (f) {
1425 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1426 fclose(f);
1427 break;
1428 }
1429 buf[0] = '\0';
1430 }
1431 closedir(dp);
1432 if (buf[0] == '\0') {
1433 printf("Unknown host!\n");
1434 return -1;
1435 }
1436
1437 return 0;
1438}
1439
9bc884b7
DG
1440/* Read a CPU node property from the host device tree that's a single
1441 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1442 * (can't find or open the property, or doesn't understand the
1443 * format) */
1444static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
eadaada1 1445{
9bc884b7
DG
1446 char buf[PATH_MAX];
1447 union {
1448 uint32_t v32;
1449 uint64_t v64;
1450 } u;
eadaada1
AG
1451 FILE *f;
1452 int len;
1453
1454 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
9bc884b7 1455 return -1;
eadaada1
AG
1456 }
1457
9bc884b7
DG
1458 strncat(buf, "/", sizeof(buf) - strlen(buf));
1459 strncat(buf, propname, sizeof(buf) - strlen(buf));
eadaada1
AG
1460
1461 f = fopen(buf, "rb");
1462 if (!f) {
1463 return -1;
1464 }
1465
9bc884b7 1466 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1467 fclose(f);
1468 switch (len) {
9bc884b7
DG
1469 case 4:
1470 /* property is a 32-bit quantity */
1471 return be32_to_cpu(u.v32);
1472 case 8:
1473 return be64_to_cpu(u.v64);
eadaada1
AG
1474 }
1475
1476 return 0;
1477}
1478
9bc884b7
DG
1479uint64_t kvmppc_get_clockfreq(void)
1480{
1481 return kvmppc_read_int_cpu_dt("clock-frequency");
1482}
1483
6659394f
DG
1484uint32_t kvmppc_get_vmx(void)
1485{
1486 return kvmppc_read_int_cpu_dt("ibm,vmx");
1487}
1488
1489uint32_t kvmppc_get_dfp(void)
1490{
1491 return kvmppc_read_int_cpu_dt("ibm,dfp");
1492}
1493
1a61a9ae
SY
1494static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1495 {
1496 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1497 CPUState *cs = CPU(cpu);
1498
1499 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1500 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1501 return 0;
1502 }
1503
1504 return 1;
1505}
1506
1507int kvmppc_get_hasidle(CPUPPCState *env)
1508{
1509 struct kvm_ppc_pvinfo pvinfo;
1510
1511 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1512 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1513 return 1;
1514 }
1515
1516 return 0;
1517}
1518
1328c2bf 1519int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1520{
1521 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1522 struct kvm_ppc_pvinfo pvinfo;
1523
1a61a9ae 1524 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1525 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1526 return 0;
1527 }
45024f09
AG
1528
1529 /*
d13fc32e 1530 * Fallback to always fail hypercalls regardless of endianness:
45024f09 1531 *
d13fc32e 1532 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
45024f09 1533 * li r3, -1
d13fc32e
AG
1534 * b .+8 (becomes nop in wrong endian)
1535 * bswap32(li r3, -1)
45024f09
AG
1536 */
1537
d13fc32e
AG
1538 hc[0] = cpu_to_be32(0x08000048);
1539 hc[1] = cpu_to_be32(0x3860ffff);
1540 hc[2] = cpu_to_be32(0x48000008);
1541 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
45024f09
AG
1542
1543 return 0;
1544}
1545
1bc22652 1546void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 1547{
1bc22652 1548 CPUState *cs = CPU(cpu);
f61b4bed
AG
1549 int ret;
1550
48add816 1551 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
f61b4bed 1552 if (ret) {
a47dddd7 1553 cpu_abort(cs, "This KVM version does not support PAPR\n");
94135e81 1554 }
9b00ea49
DG
1555
1556 /* Update the capability flag so we sync the right information
1557 * with kvm */
1558 cap_papr = 1;
f61b4bed
AG
1559}
1560
6db5bb0f
AK
1561int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1562{
1563 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1564}
1565
5b95b8b9
AG
1566void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1567{
5b95b8b9 1568 CPUState *cs = CPU(cpu);
5b95b8b9
AG
1569 int ret;
1570
48add816 1571 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
5b95b8b9 1572 if (ret && mpic_proxy) {
a47dddd7 1573 cpu_abort(cs, "This KVM version does not support EPR\n");
5b95b8b9
AG
1574 }
1575}
1576
e97c3636
DG
1577int kvmppc_smt_threads(void)
1578{
1579 return cap_ppc_smt ? cap_ppc_smt : 1;
1580}
1581
7f763a5d 1582#ifdef TARGET_PPC64
354ac20a
DG
1583off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1584{
1585 void *rma;
1586 off_t size;
1587 int fd;
1588 struct kvm_allocate_rma ret;
1589 MemoryRegion *rma_region;
1590
1591 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1592 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1593 * not necessary on this hardware
1594 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1595 *
1596 * FIXME: We should allow the user to force contiguous RMA
1597 * allocation in the cap_ppc_rma==1 case.
1598 */
1599 if (cap_ppc_rma < 2) {
1600 return 0;
1601 }
1602
1603 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1604 if (fd < 0) {
1605 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1606 strerror(errno));
1607 return -1;
1608 }
1609
1610 size = MIN(ret.rma_size, 256ul << 20);
1611
1612 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1613 if (rma == MAP_FAILED) {
1614 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1615 return -1;
1616 };
1617
1618 rma_region = g_new(MemoryRegion, 1);
2c9b15ca 1619 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
6148b23d 1620 vmstate_register_ram_global(rma_region);
354ac20a
DG
1621 memory_region_add_subregion(sysmem, 0, rma_region);
1622
1623 return size;
1624}
1625
7f763a5d
DG
1626uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1627{
f36951c1
DG
1628 struct kvm_ppc_smmu_info info;
1629 long rampagesize, best_page_shift;
1630 int i;
1631
7f763a5d
DG
1632 if (cap_ppc_rma >= 2) {
1633 return current_size;
1634 }
f36951c1
DG
1635
1636 /* Find the largest hardware supported page size that's less than
1637 * or equal to the (logical) backing page size of guest RAM */
182735ef 1638 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
f36951c1
DG
1639 rampagesize = getrampagesize();
1640 best_page_shift = 0;
1641
1642 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1643 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1644
1645 if (!sps->page_shift) {
1646 continue;
1647 }
1648
1649 if ((sps->page_shift > best_page_shift)
1650 && ((1UL << sps->page_shift) <= rampagesize)) {
1651 best_page_shift = sps->page_shift;
1652 }
1653 }
1654
7f763a5d 1655 return MIN(current_size,
f36951c1 1656 1ULL << (best_page_shift + hash_shift - 7));
7f763a5d
DG
1657}
1658#endif
1659
da95324e
AK
1660bool kvmppc_spapr_use_multitce(void)
1661{
1662 return cap_spapr_multitce;
1663}
1664
9bb62a07
AK
1665void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
1666 bool vfio_accel)
0f5cb298
DG
1667{
1668 struct kvm_create_spapr_tce args = {
1669 .liobn = liobn,
1670 .window_size = window_size,
1671 };
1672 long len;
1673 int fd;
1674 void *table;
1675
b5aec396
DG
1676 /* Must set fd to -1 so we don't try to munmap when called for
1677 * destroying the table, which the upper layers -will- do
1678 */
1679 *pfd = -1;
9bb62a07 1680 if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
0f5cb298
DG
1681 return NULL;
1682 }
1683
1684 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1685 if (fd < 0) {
b5aec396
DG
1686 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1687 liobn);
0f5cb298
DG
1688 return NULL;
1689 }
1690
a83000f5 1691 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
0f5cb298
DG
1692 /* FIXME: round this up to page size */
1693
74b41e56 1694 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 1695 if (table == MAP_FAILED) {
b5aec396
DG
1696 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1697 liobn);
0f5cb298
DG
1698 close(fd);
1699 return NULL;
1700 }
1701
1702 *pfd = fd;
1703 return table;
1704}
1705
523e7b8a 1706int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
0f5cb298
DG
1707{
1708 long len;
1709
1710 if (fd < 0) {
1711 return -1;
1712 }
1713
523e7b8a 1714 len = nb_table * sizeof(uint64_t);
0f5cb298
DG
1715 if ((munmap(table, len) < 0) ||
1716 (close(fd) < 0)) {
b5aec396
DG
1717 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1718 strerror(errno));
0f5cb298
DG
1719 /* Leak the table */
1720 }
1721
1722 return 0;
1723}
1724
7f763a5d
DG
1725int kvmppc_reset_htab(int shift_hint)
1726{
1727 uint32_t shift = shift_hint;
1728
ace9a2cb
DG
1729 if (!kvm_enabled()) {
1730 /* Full emulation, tell caller to allocate htab itself */
1731 return 0;
1732 }
1733 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
1734 int ret;
1735 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
1736 if (ret == -ENOTTY) {
1737 /* At least some versions of PR KVM advertise the
1738 * capability, but don't implement the ioctl(). Oops.
1739 * Return 0 so that we allocate the htab in qemu, as is
1740 * correct for PR. */
1741 return 0;
1742 } else if (ret < 0) {
7f763a5d
DG
1743 return ret;
1744 }
1745 return shift;
1746 }
1747
ace9a2cb
DG
1748 /* We have a kernel that predates the htab reset calls. For PR
1749 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1750 * this era, it has allocated a 16MB fixed size hash table
1751 * already. Kernels of this era have the GET_PVINFO capability
1752 * only on PR, so we use this hack to determine the right
1753 * answer */
1754 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1755 /* PR - tell caller to allocate htab */
1756 return 0;
1757 } else {
1758 /* HV - assume 16MB kernel allocated htab */
1759 return 24;
1760 }
7f763a5d
DG
1761}
1762
a1e98583
DG
1763static inline uint32_t mfpvr(void)
1764{
1765 uint32_t pvr;
1766
1767 asm ("mfpvr %0"
1768 : "=r"(pvr));
1769 return pvr;
1770}
1771
a7342588
DG
1772static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1773{
1774 if (on) {
1775 *word |= flags;
1776 } else {
1777 *word &= ~flags;
1778 }
1779}
1780
2985b86b 1781static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 1782{
2985b86b
AF
1783 assert(kvm_enabled());
1784}
1785
1786static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1787{
1788 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
1789 uint32_t vmx = kvmppc_get_vmx();
1790 uint32_t dfp = kvmppc_get_dfp();
0cbad81f
DG
1791 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1792 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
a1e98583 1793
cfe34f44 1794 /* Now fix up the class with information we can query from the host */
3bc9ccc0 1795 pcc->pvr = mfpvr();
a7342588 1796
70bca53f
AG
1797 if (vmx != -1) {
1798 /* Only override when we know what the host supports */
cfe34f44
AF
1799 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1800 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
1801 }
1802 if (dfp != -1) {
1803 /* Only override when we know what the host supports */
cfe34f44 1804 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 1805 }
0cbad81f
DG
1806
1807 if (dcache_size != -1) {
1808 pcc->l1_dcache_size = dcache_size;
1809 }
1810
1811 if (icache_size != -1) {
1812 pcc->l1_icache_size = icache_size;
1813 }
a1e98583
DG
1814}
1815
3b961124
SY
1816bool kvmppc_has_cap_epr(void)
1817{
1818 return cap_epr;
1819}
1820
7c43bca0
AK
1821bool kvmppc_has_cap_htab_fd(void)
1822{
1823 return cap_htab_fd;
1824}
1825
87a91de6
AG
1826bool kvmppc_has_cap_fixup_hcalls(void)
1827{
1828 return cap_fixup_hcalls;
1829}
1830
5b79b1ca
AK
1831static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1832{
1833 ObjectClass *oc = OBJECT_CLASS(pcc);
1834
1835 while (oc && !object_class_is_abstract(oc)) {
1836 oc = object_class_get_parent(oc);
1837 }
1838 assert(oc);
1839
1840 return POWERPC_CPU_CLASS(oc);
1841}
1842
5ba4576b
AF
1843static int kvm_ppc_register_host_cpu_type(void)
1844{
1845 TypeInfo type_info = {
1846 .name = TYPE_HOST_POWERPC_CPU,
1847 .instance_init = kvmppc_host_cpu_initfn,
1848 .class_init = kvmppc_host_cpu_class_init,
1849 };
1850 uint32_t host_pvr = mfpvr();
1851 PowerPCCPUClass *pvr_pcc;
5b79b1ca 1852 DeviceClass *dc;
5ba4576b
AF
1853
1854 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
3bc9ccc0
AK
1855 if (pvr_pcc == NULL) {
1856 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1857 }
5ba4576b
AF
1858 if (pvr_pcc == NULL) {
1859 return -1;
1860 }
1861 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1862 type_register(&type_info);
5b79b1ca
AK
1863
1864 /* Register generic family CPU class for a family */
1865 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1866 dc = DEVICE_CLASS(pvr_pcc);
1867 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1868 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1869 type_register(&type_info);
1870
5ba4576b
AF
1871 return 0;
1872}
1873
feaa64c4
DG
1874int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1875{
1876 struct kvm_rtas_token_args args = {
1877 .token = token,
1878 };
1879
1880 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1881 return -ENOENT;
1882 }
1883
1884 strncpy(args.name, function, sizeof(args.name));
1885
1886 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1887}
12b1143b 1888
e68cb8b4
AK
1889int kvmppc_get_htab_fd(bool write)
1890{
1891 struct kvm_get_htab_fd s = {
1892 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1893 .start_index = 0,
1894 };
1895
1896 if (!cap_htab_fd) {
1897 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1898 return -1;
1899 }
1900
1901 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1902}
1903
1904int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1905{
bc72ad67 1906 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
e68cb8b4
AK
1907 uint8_t buf[bufsize];
1908 ssize_t rc;
1909
1910 do {
1911 rc = read(fd, buf, bufsize);
1912 if (rc < 0) {
1913 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1914 strerror(errno));
1915 return rc;
1916 } else if (rc) {
1917 /* Kernel already retuns data in BE format for the file */
1918 qemu_put_buffer(f, buf, rc);
1919 }
1920 } while ((rc != 0)
1921 && ((max_ns < 0)
bc72ad67 1922 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
e68cb8b4
AK
1923
1924 return (rc == 0) ? 1 : 0;
1925}
1926
1927int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1928 uint16_t n_valid, uint16_t n_invalid)
1929{
1930 struct kvm_get_htab_header *buf;
1931 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1932 ssize_t rc;
1933
1934 buf = alloca(chunksize);
1935 /* This is KVM on ppc, so this is all big-endian */
1936 buf->index = index;
1937 buf->n_valid = n_valid;
1938 buf->n_invalid = n_invalid;
1939
1940 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1941
1942 rc = write(fd, buf, chunksize);
1943 if (rc < 0) {
1944 fprintf(stderr, "Error writing KVM hash table: %s\n",
1945 strerror(errno));
1946 return rc;
1947 }
1948 if (rc != chunksize) {
1949 /* We should never get a short write on a single chunk */
1950 fprintf(stderr, "Short write, restoring KVM hash table\n");
1951 return -1;
1952 }
1953 return 0;
1954}
1955
20d695a9 1956bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
1957{
1958 return true;
1959}
a1b87fe0 1960
20d695a9 1961int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
1962{
1963 return 1;
1964}
1965
1966int kvm_arch_on_sigbus(int code, void *addr)
1967{
1968 return 1;
1969}
82169660
SW
1970
1971void kvm_arch_init_irq_routing(KVMState *s)
1972{
1973}
c65f9a07
GK
1974
1975int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1976{
1977 return -EINVAL;
1978}
1979
1980int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1981{
1982 return -EINVAL;
1983}
1984
1985int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1986{
1987 return -EINVAL;
1988}
1989
1990int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1991{
1992 return -EINVAL;
1993}
1994
1995void kvm_arch_remove_all_hw_breakpoints(void)
1996{
1997}
1998
1999void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
2000{
2001}
7c43bca0
AK
2002
2003struct kvm_get_htab_buf {
2004 struct kvm_get_htab_header header;
2005 /*
2006 * We require one extra byte for read
2007 */
2008 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2009};
2010
2011uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2012{
2013 int htab_fd;
2014 struct kvm_get_htab_fd ghf;
2015 struct kvm_get_htab_buf *hpte_buf;
2016
2017 ghf.flags = 0;
2018 ghf.start_index = pte_index;
2019 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2020 if (htab_fd < 0) {
2021 goto error_out;
2022 }
2023
2024 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2025 /*
2026 * Read the hpte group
2027 */
2028 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2029 goto out_close;
2030 }
2031
2032 close(htab_fd);
2033 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2034
2035out_close:
2036 g_free(hpte_buf);
2037 close(htab_fd);
2038error_out:
2039 return 0;
2040}
2041
2042void kvmppc_hash64_free_pteg(uint64_t token)
2043{
2044 struct kvm_get_htab_buf *htab_buf;
2045
2046 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2047 hpte);
2048 g_free(htab_buf);
2049 return;
2050}
c1385933
AK
2051
2052void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2053 target_ulong pte0, target_ulong pte1)
2054{
2055 int htab_fd;
2056 struct kvm_get_htab_fd ghf;
2057 struct kvm_get_htab_buf hpte_buf;
2058
2059 ghf.flags = 0;
2060 ghf.start_index = 0; /* Ignored */
2061 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2062 if (htab_fd < 0) {
2063 goto error_out;
2064 }
2065
2066 hpte_buf.header.n_valid = 1;
2067 hpte_buf.header.n_invalid = 0;
2068 hpte_buf.header.index = pte_index;
2069 hpte_buf.hpte[0] = pte0;
2070 hpte_buf.hpte[1] = pte1;
2071 /*
2072 * Write the hpte entry.
2073 * CAUTION: write() has the warn_unused_result attribute. Hence we
2074 * need to check the return value, even though we do nothing.
2075 */
2076 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2077 goto out_close;
2078 }
2079
2080out_close:
2081 close(htab_fd);
2082 return;
2083
2084error_out:
2085 return;
2086}