]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/kvm.c
Replaced get_tick_per_sec() by NANOSECONDS_PER_SECOND
[mirror_qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
0d75590d 17#include "qemu/osdep.h"
eadaada1 18#include <dirent.h>
d76d1650
AJ
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
072ed5f2 26#include "qemu/error-report.h"
1de7afc9 27#include "qemu/timer.h"
9c17d615
PB
28#include "sysemu/sysemu.h"
29#include "sysemu/kvm.h"
d76d1650
AJ
30#include "kvm_ppc.h"
31#include "cpu.h"
9c17d615
PB
32#include "sysemu/cpus.h"
33#include "sysemu/device_tree.h"
d5aea6f3 34#include "mmu-hash64.h"
d76d1650 35
f61b4bed 36#include "hw/sysbus.h"
0d09e41a
PB
37#include "hw/ppc/spapr.h"
38#include "hw/ppc/spapr_vio.h"
98a8b524 39#include "hw/ppc/ppc.h"
31f2cb8f 40#include "sysemu/watchdog.h"
b36f100e 41#include "trace.h"
88365d17 42#include "exec/gdbstub.h"
4c663752 43#include "exec/memattrs.h"
2d103aae 44#include "sysemu/hostmem.h"
f61b4bed 45
d76d1650
AJ
46//#define DEBUG_KVM
47
48#ifdef DEBUG_KVM
da56ff91 49#define DPRINTF(fmt, ...) \
d76d1650
AJ
50 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
51#else
da56ff91 52#define DPRINTF(fmt, ...) \
d76d1650
AJ
53 do { } while (0)
54#endif
55
eadaada1
AG
56#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
57
94a8d39a
JK
58const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
59 KVM_CAP_LAST_INFO
60};
61
fc87e185
AG
62static int cap_interrupt_unset = false;
63static int cap_interrupt_level = false;
90dc8812 64static int cap_segstate;
90dc8812 65static int cap_booke_sregs;
e97c3636 66static int cap_ppc_smt;
354ac20a 67static int cap_ppc_rma;
0f5cb298 68static int cap_spapr_tce;
da95324e 69static int cap_spapr_multitce;
9bb62a07 70static int cap_spapr_vfio;
f1af19d7 71static int cap_hior;
d67d40ea 72static int cap_one_reg;
3b961124 73static int cap_epr;
31f2cb8f 74static int cap_ppc_watchdog;
9b00ea49 75static int cap_papr;
e68cb8b4 76static int cap_htab_fd;
87a91de6 77static int cap_fixup_hcalls;
fc87e185 78
3c902d44
BB
79static uint32_t debug_inst_opcode;
80
c821c2bd
AG
81/* XXX We have a race condition where we actually have a level triggered
82 * interrupt, but the infrastructure can't expose that yet, so the guest
83 * takes but ignores it, goes to sleep and never gets notified that there's
84 * still an interrupt pending.
c6a94ba5 85 *
c821c2bd
AG
86 * As a quick workaround, let's just wake up again 20 ms after we injected
87 * an interrupt. That way we can assure that we're always reinjecting
88 * interrupts in case the guest swallowed them.
c6a94ba5
AG
89 */
90static QEMUTimer *idle_timer;
91
d5a68146 92static void kvm_kick_cpu(void *opaque)
c6a94ba5 93{
d5a68146 94 PowerPCCPU *cpu = opaque;
d5a68146 95
c08d7424 96 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
97}
98
5ba4576b
AF
99static int kvm_ppc_register_host_cpu_type(void);
100
b16565b3 101int kvm_arch_init(MachineState *ms, KVMState *s)
d76d1650 102{
fc87e185 103 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 104 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 105 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 106 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 107 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 108 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 109 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
da95324e 110 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
9bb62a07 111 cap_spapr_vfio = false;
d67d40ea 112 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 113 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
3b961124 114 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
31f2cb8f 115 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
9b00ea49
DG
116 /* Note: we don't set cap_papr here, because this capability is
117 * only activated after this by kvmppc_set_papr() */
e68cb8b4 118 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
87a91de6 119 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
fc87e185
AG
120
121 if (!cap_interrupt_level) {
122 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
123 "VM to stall at times!\n");
124 }
125
5ba4576b
AF
126 kvm_ppc_register_host_cpu_type();
127
d76d1650
AJ
128 return 0;
129}
130
1bc22652 131static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 132{
1bc22652
AF
133 CPUPPCState *cenv = &cpu->env;
134 CPUState *cs = CPU(cpu);
861bbc80 135 struct kvm_sregs sregs;
5666ca4a
SW
136 int ret;
137
138 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
139 /* What we're really trying to say is "if we're on BookE, we use
140 the native PVR for now". This is the only sane way to check
141 it though, so we potentially confuse users that they can run
142 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
143 return 0;
144 } else {
90dc8812 145 if (!cap_segstate) {
64e07be5
AG
146 fprintf(stderr, "kvm error: missing PVR setting capability\n");
147 return -ENOSYS;
5666ca4a 148 }
5666ca4a
SW
149 }
150
1bc22652 151 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
152 if (ret) {
153 return ret;
154 }
861bbc80
AG
155
156 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 157 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
158}
159
93dd5e85 160/* Set up a shared TLB array with KVM */
1bc22652 161static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 162{
1bc22652
AF
163 CPUPPCState *env = &cpu->env;
164 CPUState *cs = CPU(cpu);
93dd5e85
SW
165 struct kvm_book3e_206_tlb_params params = {};
166 struct kvm_config_tlb cfg = {};
93dd5e85
SW
167 unsigned int entries = 0;
168 int ret, i;
169
170 if (!kvm_enabled() ||
a60f24b5 171 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
172 return 0;
173 }
174
175 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
176
177 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
178 params.tlb_sizes[i] = booke206_tlb_size(env, i);
179 params.tlb_ways[i] = booke206_tlb_ways(env, i);
180 entries += params.tlb_sizes[i];
181 }
182
183 assert(entries == env->nb_tlb);
184 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
185
186 env->tlb_dirty = true;
187
188 cfg.array = (uintptr_t)env->tlb.tlbm;
189 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
190 cfg.params = (uintptr_t)&params;
191 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
192
48add816 193 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
93dd5e85
SW
194 if (ret < 0) {
195 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
196 __func__, strerror(-ret));
197 return ret;
198 }
199
200 env->kvm_sw_tlb = true;
201 return 0;
202}
203
4656e1f0
BH
204
205#if defined(TARGET_PPC64)
a60f24b5 206static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
207 struct kvm_ppc_smmu_info *info)
208{
a60f24b5
AF
209 CPUPPCState *env = &cpu->env;
210 CPUState *cs = CPU(cpu);
211
4656e1f0
BH
212 memset(info, 0, sizeof(*info));
213
214 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
215 * need to "guess" what the supported page sizes are.
216 *
217 * For that to work we make a few assumptions:
218 *
219 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
220 * KVM which only supports 4K and 16M pages, but supports them
221 * regardless of the backing store characteritics. We also don't
222 * support 1T segments.
223 *
224 * This is safe as if HV KVM ever supports that capability or PR
225 * KVM grows supports for more page/segment sizes, those versions
226 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
227 * will not hit this fallback
228 *
229 * - Else we are running HV KVM. This means we only support page
230 * sizes that fit in the backing store. Additionally we only
231 * advertize 64K pages if the processor is ARCH 2.06 and we assume
232 * P7 encodings for the SLB and hash table. Here too, we assume
233 * support for any newer processor will mean a kernel that
234 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
235 * this fallback.
236 */
a60f24b5 237 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
238 /* No flags */
239 info->flags = 0;
240 info->slb_size = 64;
241
242 /* Standard 4k base page size segment */
243 info->sps[0].page_shift = 12;
244 info->sps[0].slb_enc = 0;
245 info->sps[0].enc[0].page_shift = 12;
246 info->sps[0].enc[0].pte_enc = 0;
247
248 /* Standard 16M large page size segment */
249 info->sps[1].page_shift = 24;
250 info->sps[1].slb_enc = SLB_VSID_L;
251 info->sps[1].enc[0].page_shift = 24;
252 info->sps[1].enc[0].pte_enc = 0;
253 } else {
254 int i = 0;
255
256 /* HV KVM has backing store size restrictions */
257 info->flags = KVM_PPC_PAGE_SIZES_REAL;
258
259 if (env->mmu_model & POWERPC_MMU_1TSEG) {
260 info->flags |= KVM_PPC_1T_SEGMENTS;
261 }
262
aa4bb587
BH
263 if (env->mmu_model == POWERPC_MMU_2_06 ||
264 env->mmu_model == POWERPC_MMU_2_07) {
4656e1f0
BH
265 info->slb_size = 32;
266 } else {
267 info->slb_size = 64;
268 }
269
270 /* Standard 4k base page size segment */
271 info->sps[i].page_shift = 12;
272 info->sps[i].slb_enc = 0;
273 info->sps[i].enc[0].page_shift = 12;
274 info->sps[i].enc[0].pte_enc = 0;
275 i++;
276
aa4bb587
BH
277 /* 64K on MMU 2.06 and later */
278 if (env->mmu_model == POWERPC_MMU_2_06 ||
279 env->mmu_model == POWERPC_MMU_2_07) {
4656e1f0
BH
280 info->sps[i].page_shift = 16;
281 info->sps[i].slb_enc = 0x110;
282 info->sps[i].enc[0].page_shift = 16;
283 info->sps[i].enc[0].pte_enc = 1;
284 i++;
285 }
286
287 /* Standard 16M large page size segment */
288 info->sps[i].page_shift = 24;
289 info->sps[i].slb_enc = SLB_VSID_L;
290 info->sps[i].enc[0].page_shift = 24;
291 info->sps[i].enc[0].pte_enc = 0;
292 }
293}
294
a60f24b5 295static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 296{
a60f24b5 297 CPUState *cs = CPU(cpu);
4656e1f0
BH
298 int ret;
299
a60f24b5
AF
300 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
301 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
302 if (ret == 0) {
303 return;
304 }
305 }
306
a60f24b5 307 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
308}
309
2d103aae 310static long gethugepagesize(const char *mem_path)
4656e1f0
BH
311{
312 struct statfs fs;
313 int ret;
314
4656e1f0
BH
315 do {
316 ret = statfs(mem_path, &fs);
317 } while (ret != 0 && errno == EINTR);
318
319 if (ret != 0) {
320 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
321 strerror(errno));
322 exit(1);
323 }
324
325#define HUGETLBFS_MAGIC 0x958458f6
326
327 if (fs.f_type != HUGETLBFS_MAGIC) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
330 }
331
332 /* It's hugepage, return the huge page size */
333 return fs.f_bsize;
334}
335
2d103aae
MR
336static int find_max_supported_pagesize(Object *obj, void *opaque)
337{
338 char *mem_path;
339 long *hpsize_min = opaque;
340
341 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
342 mem_path = object_property_get_str(obj, "mem-path", NULL);
343 if (mem_path) {
344 long hpsize = gethugepagesize(mem_path);
345 if (hpsize < *hpsize_min) {
346 *hpsize_min = hpsize;
347 }
348 } else {
349 *hpsize_min = getpagesize();
350 }
351 }
352
353 return 0;
354}
355
356static long getrampagesize(void)
357{
358 long hpsize = LONG_MAX;
359 Object *memdev_root;
360
361 if (mem_path) {
362 return gethugepagesize(mem_path);
363 }
364
365 /* it's possible we have memory-backend objects with
366 * hugepage-backed RAM. these may get mapped into system
367 * address space via -numa parameters or memory hotplug
368 * hooks. we want to take these into account, but we
369 * also want to make sure these supported hugepage
370 * sizes are applicable across the entire range of memory
371 * we may boot from, so we take the min across all
372 * backends, and assume normal pages in cases where a
373 * backend isn't backed by hugepages.
374 */
375 memdev_root = object_resolve_path("/objects", NULL);
376 if (!memdev_root) {
377 return getpagesize();
378 }
379
380 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
381
382 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
383}
384
4656e1f0
BH
385static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
386{
387 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
388 return true;
389 }
390
391 return (1ul << shift) <= rampgsize;
392}
393
a60f24b5 394static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
395{
396 static struct kvm_ppc_smmu_info smmu_info;
397 static bool has_smmu_info;
a60f24b5 398 CPUPPCState *env = &cpu->env;
4656e1f0
BH
399 long rampagesize;
400 int iq, ik, jq, jk;
401
402 /* We only handle page sizes for 64-bit server guests for now */
403 if (!(env->mmu_model & POWERPC_MMU_64)) {
404 return;
405 }
406
407 /* Collect MMU info from kernel if not already */
408 if (!has_smmu_info) {
a60f24b5 409 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
410 has_smmu_info = true;
411 }
412
413 rampagesize = getrampagesize();
414
415 /* Convert to QEMU form */
416 memset(&env->sps, 0, sizeof(env->sps));
417
90da0d5a
BH
418 /* If we have HV KVM, we need to forbid CI large pages if our
419 * host page size is smaller than 64K.
420 */
421 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
422 env->ci_large_pages = getpagesize() >= 0x10000;
423 }
424
08215d8f
AG
425 /*
426 * XXX This loop should be an entry wide AND of the capabilities that
427 * the selected CPU has with the capabilities that KVM supports.
428 */
4656e1f0
BH
429 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
430 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
431 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
432
433 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
434 ksps->page_shift)) {
435 continue;
436 }
437 qsps->page_shift = ksps->page_shift;
438 qsps->slb_enc = ksps->slb_enc;
439 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
440 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
441 ksps->enc[jk].page_shift)) {
442 continue;
443 }
444 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
445 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
446 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
447 break;
448 }
449 }
450 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
451 break;
452 }
453 }
454 env->slb_nr = smmu_info.slb_size;
08215d8f 455 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
4656e1f0
BH
456 env->mmu_model &= ~POWERPC_MMU_1TSEG;
457 }
458}
459#else /* defined (TARGET_PPC64) */
460
a60f24b5 461static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
462{
463}
464
465#endif /* !defined (TARGET_PPC64) */
466
b164e48e
EH
467unsigned long kvm_arch_vcpu_id(CPUState *cpu)
468{
0f20ba62 469 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
b164e48e
EH
470}
471
88365d17
BB
472/* e500 supports 2 h/w breakpoint and 2 watchpoint.
473 * book3s supports only 1 watchpoint, so array size
474 * of 4 is sufficient for now.
475 */
476#define MAX_HW_BKPTS 4
477
478static struct HWBreakpoint {
479 target_ulong addr;
480 int type;
481} hw_debug_points[MAX_HW_BKPTS];
482
483static CPUWatchpoint hw_watchpoint;
484
485/* Default there is no breakpoint and watchpoint supported */
486static int max_hw_breakpoint;
487static int max_hw_watchpoint;
488static int nb_hw_breakpoint;
489static int nb_hw_watchpoint;
490
491static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
492{
493 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
494 max_hw_breakpoint = 2;
495 max_hw_watchpoint = 2;
496 }
497
498 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
499 fprintf(stderr, "Error initializing h/w breakpoints\n");
500 return;
501 }
502}
503
20d695a9 504int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 505{
20d695a9
AF
506 PowerPCCPU *cpu = POWERPC_CPU(cs);
507 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
508 int ret;
509
4656e1f0 510 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 511 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
512
513 /* Synchronize sregs with kvm */
1bc22652 514 ret = kvm_arch_sync_sregs(cpu);
5666ca4a 515 if (ret) {
388e47c7
TH
516 if (ret == -EINVAL) {
517 error_report("Register sync failed... If you're using kvm-hv.ko,"
518 " only \"-cpu host\" is possible");
519 }
5666ca4a
SW
520 return ret;
521 }
861bbc80 522
bc72ad67 523 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
c821c2bd 524
93dd5e85
SW
525 /* Some targets support access to KVM's guest TLB. */
526 switch (cenv->mmu_model) {
527 case POWERPC_MMU_BOOKE206:
1bc22652 528 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
529 break;
530 default:
531 break;
532 }
533
3c902d44 534 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
88365d17 535 kvmppc_hw_debug_points_init(cenv);
3c902d44 536
861bbc80 537 return ret;
d76d1650
AJ
538}
539
1bc22652 540static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 541{
1bc22652
AF
542 CPUPPCState *env = &cpu->env;
543 CPUState *cs = CPU(cpu);
93dd5e85
SW
544 struct kvm_dirty_tlb dirty_tlb;
545 unsigned char *bitmap;
546 int ret;
547
548 if (!env->kvm_sw_tlb) {
549 return;
550 }
551
552 bitmap = g_malloc((env->nb_tlb + 7) / 8);
553 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
554
555 dirty_tlb.bitmap = (uintptr_t)bitmap;
556 dirty_tlb.num_dirty = env->nb_tlb;
557
1bc22652 558 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
559 if (ret) {
560 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
561 __func__, strerror(-ret));
562 }
563
564 g_free(bitmap);
565}
566
d67d40ea
DG
567static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
568{
569 PowerPCCPU *cpu = POWERPC_CPU(cs);
570 CPUPPCState *env = &cpu->env;
571 union {
572 uint32_t u32;
573 uint64_t u64;
574 } val;
575 struct kvm_one_reg reg = {
576 .id = id,
577 .addr = (uintptr_t) &val,
578 };
579 int ret;
580
581 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
582 if (ret != 0) {
b36f100e 583 trace_kvm_failed_spr_get(spr, strerror(errno));
d67d40ea
DG
584 } else {
585 switch (id & KVM_REG_SIZE_MASK) {
586 case KVM_REG_SIZE_U32:
587 env->spr[spr] = val.u32;
588 break;
589
590 case KVM_REG_SIZE_U64:
591 env->spr[spr] = val.u64;
592 break;
593
594 default:
595 /* Don't handle this size yet */
596 abort();
597 }
598 }
599}
600
601static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
602{
603 PowerPCCPU *cpu = POWERPC_CPU(cs);
604 CPUPPCState *env = &cpu->env;
605 union {
606 uint32_t u32;
607 uint64_t u64;
608 } val;
609 struct kvm_one_reg reg = {
610 .id = id,
611 .addr = (uintptr_t) &val,
612 };
613 int ret;
614
615 switch (id & KVM_REG_SIZE_MASK) {
616 case KVM_REG_SIZE_U32:
617 val.u32 = env->spr[spr];
618 break;
619
620 case KVM_REG_SIZE_U64:
621 val.u64 = env->spr[spr];
622 break;
623
624 default:
625 /* Don't handle this size yet */
626 abort();
627 }
628
629 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
630 if (ret != 0) {
b36f100e 631 trace_kvm_failed_spr_set(spr, strerror(errno));
d67d40ea
DG
632 }
633}
634
70b79849
DG
635static int kvm_put_fp(CPUState *cs)
636{
637 PowerPCCPU *cpu = POWERPC_CPU(cs);
638 CPUPPCState *env = &cpu->env;
639 struct kvm_one_reg reg;
640 int i;
641 int ret;
642
643 if (env->insns_flags & PPC_FLOAT) {
644 uint64_t fpscr = env->fpscr;
645 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
646
647 reg.id = KVM_REG_PPC_FPSCR;
648 reg.addr = (uintptr_t)&fpscr;
649 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
650 if (ret < 0) {
da56ff91 651 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
70b79849
DG
652 return ret;
653 }
654
655 for (i = 0; i < 32; i++) {
656 uint64_t vsr[2];
657
3a4b791b 658#ifdef HOST_WORDS_BIGENDIAN
70b79849
DG
659 vsr[0] = float64_val(env->fpr[i]);
660 vsr[1] = env->vsr[i];
3a4b791b
GK
661#else
662 vsr[0] = env->vsr[i];
663 vsr[1] = float64_val(env->fpr[i]);
664#endif
70b79849
DG
665 reg.addr = (uintptr_t) &vsr;
666 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
667
668 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
669 if (ret < 0) {
da56ff91 670 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
70b79849
DG
671 i, strerror(errno));
672 return ret;
673 }
674 }
675 }
676
677 if (env->insns_flags & PPC_ALTIVEC) {
678 reg.id = KVM_REG_PPC_VSCR;
679 reg.addr = (uintptr_t)&env->vscr;
680 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
681 if (ret < 0) {
da56ff91 682 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
70b79849
DG
683 return ret;
684 }
685
686 for (i = 0; i < 32; i++) {
687 reg.id = KVM_REG_PPC_VR(i);
688 reg.addr = (uintptr_t)&env->avr[i];
689 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
690 if (ret < 0) {
da56ff91 691 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
70b79849
DG
692 return ret;
693 }
694 }
695 }
696
697 return 0;
698}
699
700static int kvm_get_fp(CPUState *cs)
701{
702 PowerPCCPU *cpu = POWERPC_CPU(cs);
703 CPUPPCState *env = &cpu->env;
704 struct kvm_one_reg reg;
705 int i;
706 int ret;
707
708 if (env->insns_flags & PPC_FLOAT) {
709 uint64_t fpscr;
710 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
711
712 reg.id = KVM_REG_PPC_FPSCR;
713 reg.addr = (uintptr_t)&fpscr;
714 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
715 if (ret < 0) {
da56ff91 716 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
70b79849
DG
717 return ret;
718 } else {
719 env->fpscr = fpscr;
720 }
721
722 for (i = 0; i < 32; i++) {
723 uint64_t vsr[2];
724
725 reg.addr = (uintptr_t) &vsr;
726 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
727
728 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
729 if (ret < 0) {
da56ff91 730 DPRINTF("Unable to get %s%d from KVM: %s\n",
70b79849
DG
731 vsx ? "VSR" : "FPR", i, strerror(errno));
732 return ret;
733 } else {
3a4b791b 734#ifdef HOST_WORDS_BIGENDIAN
70b79849
DG
735 env->fpr[i] = vsr[0];
736 if (vsx) {
737 env->vsr[i] = vsr[1];
738 }
3a4b791b
GK
739#else
740 env->fpr[i] = vsr[1];
741 if (vsx) {
742 env->vsr[i] = vsr[0];
743 }
744#endif
70b79849
DG
745 }
746 }
747 }
748
749 if (env->insns_flags & PPC_ALTIVEC) {
750 reg.id = KVM_REG_PPC_VSCR;
751 reg.addr = (uintptr_t)&env->vscr;
752 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
753 if (ret < 0) {
da56ff91 754 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
70b79849
DG
755 return ret;
756 }
757
758 for (i = 0; i < 32; i++) {
759 reg.id = KVM_REG_PPC_VR(i);
760 reg.addr = (uintptr_t)&env->avr[i];
761 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
762 if (ret < 0) {
da56ff91 763 DPRINTF("Unable to get VR%d from KVM: %s\n",
70b79849
DG
764 i, strerror(errno));
765 return ret;
766 }
767 }
768 }
769
770 return 0;
771}
772
9b00ea49
DG
773#if defined(TARGET_PPC64)
774static int kvm_get_vpa(CPUState *cs)
775{
776 PowerPCCPU *cpu = POWERPC_CPU(cs);
777 CPUPPCState *env = &cpu->env;
778 struct kvm_one_reg reg;
779 int ret;
780
781 reg.id = KVM_REG_PPC_VPA_ADDR;
782 reg.addr = (uintptr_t)&env->vpa_addr;
783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784 if (ret < 0) {
da56ff91 785 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
9b00ea49
DG
786 return ret;
787 }
788
789 assert((uintptr_t)&env->slb_shadow_size
790 == ((uintptr_t)&env->slb_shadow_addr + 8));
791 reg.id = KVM_REG_PPC_VPA_SLB;
792 reg.addr = (uintptr_t)&env->slb_shadow_addr;
793 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
794 if (ret < 0) {
da56ff91 795 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
9b00ea49
DG
796 strerror(errno));
797 return ret;
798 }
799
800 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
801 reg.id = KVM_REG_PPC_VPA_DTL;
802 reg.addr = (uintptr_t)&env->dtl_addr;
803 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
804 if (ret < 0) {
da56ff91 805 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
9b00ea49
DG
806 strerror(errno));
807 return ret;
808 }
809
810 return 0;
811}
812
813static int kvm_put_vpa(CPUState *cs)
814{
815 PowerPCCPU *cpu = POWERPC_CPU(cs);
816 CPUPPCState *env = &cpu->env;
817 struct kvm_one_reg reg;
818 int ret;
819
820 /* SLB shadow or DTL can't be registered unless a master VPA is
821 * registered. That means when restoring state, if a VPA *is*
822 * registered, we need to set that up first. If not, we need to
823 * deregister the others before deregistering the master VPA */
824 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
825
826 if (env->vpa_addr) {
827 reg.id = KVM_REG_PPC_VPA_ADDR;
828 reg.addr = (uintptr_t)&env->vpa_addr;
829 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
830 if (ret < 0) {
da56ff91 831 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
832 return ret;
833 }
834 }
835
836 assert((uintptr_t)&env->slb_shadow_size
837 == ((uintptr_t)&env->slb_shadow_addr + 8));
838 reg.id = KVM_REG_PPC_VPA_SLB;
839 reg.addr = (uintptr_t)&env->slb_shadow_addr;
840 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
841 if (ret < 0) {
da56ff91 842 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
9b00ea49
DG
843 return ret;
844 }
845
846 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
847 reg.id = KVM_REG_PPC_VPA_DTL;
848 reg.addr = (uintptr_t)&env->dtl_addr;
849 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
850 if (ret < 0) {
da56ff91 851 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
9b00ea49
DG
852 strerror(errno));
853 return ret;
854 }
855
856 if (!env->vpa_addr) {
857 reg.id = KVM_REG_PPC_VPA_ADDR;
858 reg.addr = (uintptr_t)&env->vpa_addr;
859 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
860 if (ret < 0) {
da56ff91 861 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
862 return ret;
863 }
864 }
865
866 return 0;
867}
868#endif /* TARGET_PPC64 */
869
e5c0d3ce 870int kvmppc_put_books_sregs(PowerPCCPU *cpu)
a7a00a72
DG
871{
872 CPUPPCState *env = &cpu->env;
873 struct kvm_sregs sregs;
874 int i;
875
876 sregs.pvr = env->spr[SPR_PVR];
877
878 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
879
880 /* Sync SLB */
881#ifdef TARGET_PPC64
882 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
883 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
884 if (env->slb[i].esid & SLB_ESID_V) {
885 sregs.u.s.ppc64.slb[i].slbe |= i;
886 }
887 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
888 }
889#endif
890
891 /* Sync SRs */
892 for (i = 0; i < 16; i++) {
893 sregs.u.s.ppc32.sr[i] = env->sr[i];
894 }
895
896 /* Sync BATs */
897 for (i = 0; i < 8; i++) {
898 /* Beware. We have to swap upper and lower bits here */
899 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
900 | env->DBAT[1][i];
901 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
902 | env->IBAT[1][i];
903 }
904
905 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
906}
907
20d695a9 908int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 909{
20d695a9
AF
910 PowerPCCPU *cpu = POWERPC_CPU(cs);
911 CPUPPCState *env = &cpu->env;
d76d1650
AJ
912 struct kvm_regs regs;
913 int ret;
914 int i;
915
1bc22652
AF
916 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
917 if (ret < 0) {
d76d1650 918 return ret;
1bc22652 919 }
d76d1650
AJ
920
921 regs.ctr = env->ctr;
922 regs.lr = env->lr;
da91a00f 923 regs.xer = cpu_read_xer(env);
d76d1650
AJ
924 regs.msr = env->msr;
925 regs.pc = env->nip;
926
927 regs.srr0 = env->spr[SPR_SRR0];
928 regs.srr1 = env->spr[SPR_SRR1];
929
930 regs.sprg0 = env->spr[SPR_SPRG0];
931 regs.sprg1 = env->spr[SPR_SPRG1];
932 regs.sprg2 = env->spr[SPR_SPRG2];
933 regs.sprg3 = env->spr[SPR_SPRG3];
934 regs.sprg4 = env->spr[SPR_SPRG4];
935 regs.sprg5 = env->spr[SPR_SPRG5];
936 regs.sprg6 = env->spr[SPR_SPRG6];
937 regs.sprg7 = env->spr[SPR_SPRG7];
938
90dc8812
SW
939 regs.pid = env->spr[SPR_BOOKE_PID];
940
d76d1650
AJ
941 for (i = 0;i < 32; i++)
942 regs.gpr[i] = env->gpr[i];
943
4bddaf55
AK
944 regs.cr = 0;
945 for (i = 0; i < 8; i++) {
946 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
947 }
948
1bc22652 949 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
950 if (ret < 0)
951 return ret;
952
70b79849
DG
953 kvm_put_fp(cs);
954
93dd5e85 955 if (env->tlb_dirty) {
1bc22652 956 kvm_sw_tlb_put(cpu);
93dd5e85
SW
957 env->tlb_dirty = false;
958 }
959
f1af19d7 960 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
a7a00a72
DG
961 ret = kvmppc_put_books_sregs(cpu);
962 if (ret < 0) {
f1af19d7
DG
963 return ret;
964 }
965 }
966
967 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
968 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
969 }
f1af19d7 970
d67d40ea
DG
971 if (cap_one_reg) {
972 int i;
973
974 /* We deliberately ignore errors here, for kernels which have
975 * the ONE_REG calls, but don't support the specific
976 * registers, there's a reasonable chance things will still
977 * work, at least until we try to migrate. */
978 for (i = 0; i < 1024; i++) {
979 uint64_t id = env->spr_cb[i].one_reg_id;
980
981 if (id != 0) {
982 kvm_put_one_spr(cs, id, i);
983 }
f1af19d7 984 }
9b00ea49
DG
985
986#ifdef TARGET_PPC64
80b3f79b
AK
987 if (msr_ts) {
988 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
990 }
991 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
992 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
993 }
994 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
995 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
996 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
997 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
998 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
999 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1000 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1001 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1002 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1003 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1004 }
1005
9b00ea49
DG
1006 if (cap_papr) {
1007 if (kvm_put_vpa(cs) < 0) {
da56ff91 1008 DPRINTF("Warning: Unable to set VPA information to KVM\n");
9b00ea49
DG
1009 }
1010 }
98a8b524
AK
1011
1012 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1013#endif /* TARGET_PPC64 */
f1af19d7
DG
1014 }
1015
d76d1650
AJ
1016 return ret;
1017}
1018
c371c2e3
BB
1019static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1020{
1021 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1022}
1023
a7a00a72
DG
1024static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1025{
1026 CPUPPCState *env = &cpu->env;
1027 struct kvm_sregs sregs;
1028 int ret;
1029
1030 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1031 if (ret < 0) {
1032 return ret;
1033 }
1034
1035 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1036 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1037 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1038 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1039 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1040 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1041 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1042 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1043 env->spr[SPR_DECR] = sregs.u.e.dec;
1044 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1045 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1046 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1047 }
1048
1049 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1050 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1051 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1052 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1053 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1054 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1055 }
1056
1057 if (sregs.u.e.features & KVM_SREGS_E_64) {
1058 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1059 }
1060
1061 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1062 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1063 }
1064
1065 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1066 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1067 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1068 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1069 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1070 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1071 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1072 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1073 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1074 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1075 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1076 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1077 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1078 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1079 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1080 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1081 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1082 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1083 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1084 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1085 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1086 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1087 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1088 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1089 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1090 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1091 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1092 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1093 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1094 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1095 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1096 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1097 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1098
1099 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1100 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1101 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1102 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1103 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1104 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1105 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1106 }
1107
1108 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1109 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1110 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1111 }
1112
1113 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1114 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1115 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1116 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1117 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1118 }
1119 }
1120
1121 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1122 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1123 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1124 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1125 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1126 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1127 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1128 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1129 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1130 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1131 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1132 }
1133
1134 if (sregs.u.e.features & KVM_SREGS_EXP) {
1135 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1136 }
1137
1138 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1139 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1140 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1141 }
1142
1143 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1144 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1145 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1146 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1147
1148 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1149 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1150 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1151 }
1152 }
1153
1154 return 0;
1155}
1156
1157static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1158{
1159 CPUPPCState *env = &cpu->env;
1160 struct kvm_sregs sregs;
1161 int ret;
1162 int i;
1163
1164 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1165 if (ret < 0) {
1166 return ret;
1167 }
1168
1169 if (!env->external_htab) {
1170 ppc_store_sdr1(env, sregs.u.s.sdr1);
1171 }
1172
1173 /* Sync SLB */
1174#ifdef TARGET_PPC64
1175 /*
1176 * The packed SLB array we get from KVM_GET_SREGS only contains
1177 * information about valid entries. So we flush our internal copy
1178 * to get rid of stale ones, then put all valid SLB entries back
1179 * in.
1180 */
1181 memset(env->slb, 0, sizeof(env->slb));
1182 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1183 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1184 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1185 /*
1186 * Only restore valid entries
1187 */
1188 if (rb & SLB_ESID_V) {
1189 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1190 }
1191 }
1192#endif
1193
1194 /* Sync SRs */
1195 for (i = 0; i < 16; i++) {
1196 env->sr[i] = sregs.u.s.ppc32.sr[i];
1197 }
1198
1199 /* Sync BATs */
1200 for (i = 0; i < 8; i++) {
1201 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1202 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1203 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1204 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1205 }
1206
1207 return 0;
1208}
1209
20d695a9 1210int kvm_arch_get_registers(CPUState *cs)
d76d1650 1211{
20d695a9
AF
1212 PowerPCCPU *cpu = POWERPC_CPU(cs);
1213 CPUPPCState *env = &cpu->env;
d76d1650 1214 struct kvm_regs regs;
90dc8812 1215 uint32_t cr;
138b38b6 1216 int i, ret;
d76d1650 1217
1bc22652 1218 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
1219 if (ret < 0)
1220 return ret;
1221
90dc8812
SW
1222 cr = regs.cr;
1223 for (i = 7; i >= 0; i--) {
1224 env->crf[i] = cr & 15;
1225 cr >>= 4;
1226 }
ba5e5090 1227
d76d1650
AJ
1228 env->ctr = regs.ctr;
1229 env->lr = regs.lr;
da91a00f 1230 cpu_write_xer(env, regs.xer);
d76d1650
AJ
1231 env->msr = regs.msr;
1232 env->nip = regs.pc;
1233
1234 env->spr[SPR_SRR0] = regs.srr0;
1235 env->spr[SPR_SRR1] = regs.srr1;
1236
1237 env->spr[SPR_SPRG0] = regs.sprg0;
1238 env->spr[SPR_SPRG1] = regs.sprg1;
1239 env->spr[SPR_SPRG2] = regs.sprg2;
1240 env->spr[SPR_SPRG3] = regs.sprg3;
1241 env->spr[SPR_SPRG4] = regs.sprg4;
1242 env->spr[SPR_SPRG5] = regs.sprg5;
1243 env->spr[SPR_SPRG6] = regs.sprg6;
1244 env->spr[SPR_SPRG7] = regs.sprg7;
1245
90dc8812
SW
1246 env->spr[SPR_BOOKE_PID] = regs.pid;
1247
d76d1650
AJ
1248 for (i = 0;i < 32; i++)
1249 env->gpr[i] = regs.gpr[i];
1250
70b79849
DG
1251 kvm_get_fp(cs);
1252
90dc8812 1253 if (cap_booke_sregs) {
a7a00a72 1254 ret = kvmppc_get_booke_sregs(cpu);
90dc8812
SW
1255 if (ret < 0) {
1256 return ret;
1257 }
fafc0b6a 1258 }
90dc8812 1259
90dc8812 1260 if (cap_segstate) {
a7a00a72 1261 ret = kvmppc_get_books_sregs(cpu);
90dc8812
SW
1262 if (ret < 0) {
1263 return ret;
1264 }
fafc0b6a 1265 }
ba5e5090 1266
d67d40ea
DG
1267 if (cap_hior) {
1268 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1269 }
1270
1271 if (cap_one_reg) {
1272 int i;
1273
1274 /* We deliberately ignore errors here, for kernels which have
1275 * the ONE_REG calls, but don't support the specific
1276 * registers, there's a reasonable chance things will still
1277 * work, at least until we try to migrate. */
1278 for (i = 0; i < 1024; i++) {
1279 uint64_t id = env->spr_cb[i].one_reg_id;
1280
1281 if (id != 0) {
1282 kvm_get_one_spr(cs, id, i);
1283 }
1284 }
9b00ea49
DG
1285
1286#ifdef TARGET_PPC64
80b3f79b
AK
1287 if (msr_ts) {
1288 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1290 }
1291 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1292 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1293 }
1294 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1295 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1296 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1297 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1298 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1299 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1300 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1301 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1302 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1303 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1304 }
1305
9b00ea49
DG
1306 if (cap_papr) {
1307 if (kvm_get_vpa(cs) < 0) {
da56ff91 1308 DPRINTF("Warning: Unable to get VPA information from KVM\n");
9b00ea49
DG
1309 }
1310 }
98a8b524
AK
1311
1312 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1313#endif
d67d40ea
DG
1314 }
1315
d76d1650
AJ
1316 return 0;
1317}
1318
1bc22652 1319int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
1320{
1321 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1322
1323 if (irq != PPC_INTERRUPT_EXT) {
1324 return 0;
1325 }
1326
1327 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1328 return 0;
1329 }
1330
1bc22652 1331 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
1332
1333 return 0;
1334}
1335
16415335
AG
1336#if defined(TARGET_PPCEMB)
1337#define PPC_INPUT_INT PPC40x_INPUT_INT
1338#elif defined(TARGET_PPC64)
1339#define PPC_INPUT_INT PPC970_INPUT_INT
1340#else
1341#define PPC_INPUT_INT PPC6xx_INPUT_INT
1342#endif
1343
20d695a9 1344void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 1345{
20d695a9
AF
1346 PowerPCCPU *cpu = POWERPC_CPU(cs);
1347 CPUPPCState *env = &cpu->env;
d76d1650
AJ
1348 int r;
1349 unsigned irq;
1350
4b8523ee
JK
1351 qemu_mutex_lock_iothread();
1352
5cbdb3a3 1353 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 1354 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
1355 if (!cap_interrupt_level &&
1356 run->ready_for_interrupt_injection &&
259186a7 1357 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 1358 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
1359 {
1360 /* For now KVM disregards the 'irq' argument. However, in the
1361 * future KVM could cache it in-kernel to avoid a heavyweight exit
1362 * when reading the UIC.
1363 */
fc87e185 1364 irq = KVM_INTERRUPT_SET;
d76d1650 1365
da56ff91 1366 DPRINTF("injected interrupt %d\n", irq);
1bc22652 1367 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1368 if (r < 0) {
1369 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1370 }
c821c2bd
AG
1371
1372 /* Always wake up soon in case the interrupt was level based */
bc72ad67 1373 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 1374 (NANOSECONDS_PER_SECOND / 50));
d76d1650
AJ
1375 }
1376
1377 /* We don't know if there are more interrupts pending after this. However,
1378 * the guest will return to userspace in the course of handling this one
1379 * anyways, so we will get a chance to deliver the rest. */
4b8523ee
JK
1380
1381 qemu_mutex_unlock_iothread();
d76d1650
AJ
1382}
1383
4c663752 1384MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
d76d1650 1385{
4c663752 1386 return MEMTXATTRS_UNSPECIFIED;
d76d1650
AJ
1387}
1388
20d695a9 1389int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1390{
259186a7 1391 return cs->halted;
0af691d7
MT
1392}
1393
259186a7 1394static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1395{
259186a7
AF
1396 CPUState *cs = CPU(cpu);
1397 CPUPPCState *env = &cpu->env;
1398
1399 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1400 cs->halted = 1;
27103424 1401 cs->exception_index = EXCP_HLT;
d76d1650
AJ
1402 }
1403
bb4ea393 1404 return 0;
d76d1650
AJ
1405}
1406
1407/* map dcr access to existing qemu dcr emulation */
1328c2bf 1408static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1409{
1410 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1411 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1412
bb4ea393 1413 return 0;
d76d1650
AJ
1414}
1415
1328c2bf 1416static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1417{
1418 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1419 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1420
bb4ea393 1421 return 0;
d76d1650
AJ
1422}
1423
8a0548f9
BB
1424int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1425{
1426 /* Mixed endian case is not handled */
1427 uint32_t sc = debug_inst_opcode;
1428
1429 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1430 sizeof(sc), 0) ||
1431 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1432 return -EINVAL;
1433 }
1434
1435 return 0;
1436}
1437
1438int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1439{
1440 uint32_t sc;
1441
1442 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1443 sc != debug_inst_opcode ||
1444 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1445 sizeof(sc), 1)) {
1446 return -EINVAL;
1447 }
1448
1449 return 0;
1450}
1451
88365d17
BB
1452static int find_hw_breakpoint(target_ulong addr, int type)
1453{
1454 int n;
1455
1456 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1457 <= ARRAY_SIZE(hw_debug_points));
1458
1459 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1460 if (hw_debug_points[n].addr == addr &&
1461 hw_debug_points[n].type == type) {
1462 return n;
1463 }
1464 }
1465
1466 return -1;
1467}
1468
1469static int find_hw_watchpoint(target_ulong addr, int *flag)
1470{
1471 int n;
1472
1473 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1474 if (n >= 0) {
1475 *flag = BP_MEM_ACCESS;
1476 return n;
1477 }
1478
1479 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1480 if (n >= 0) {
1481 *flag = BP_MEM_WRITE;
1482 return n;
1483 }
1484
1485 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1486 if (n >= 0) {
1487 *flag = BP_MEM_READ;
1488 return n;
1489 }
1490
1491 return -1;
1492}
1493
1494int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1495 target_ulong len, int type)
1496{
1497 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1498 return -ENOBUFS;
1499 }
1500
1501 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1502 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1503
1504 switch (type) {
1505 case GDB_BREAKPOINT_HW:
1506 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1507 return -ENOBUFS;
1508 }
1509
1510 if (find_hw_breakpoint(addr, type) >= 0) {
1511 return -EEXIST;
1512 }
1513
1514 nb_hw_breakpoint++;
1515 break;
1516
1517 case GDB_WATCHPOINT_WRITE:
1518 case GDB_WATCHPOINT_READ:
1519 case GDB_WATCHPOINT_ACCESS:
1520 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1521 return -ENOBUFS;
1522 }
1523
1524 if (find_hw_breakpoint(addr, type) >= 0) {
1525 return -EEXIST;
1526 }
1527
1528 nb_hw_watchpoint++;
1529 break;
1530
1531 default:
1532 return -ENOSYS;
1533 }
1534
1535 return 0;
1536}
1537
1538int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1539 target_ulong len, int type)
1540{
1541 int n;
1542
1543 n = find_hw_breakpoint(addr, type);
1544 if (n < 0) {
1545 return -ENOENT;
1546 }
1547
1548 switch (type) {
1549 case GDB_BREAKPOINT_HW:
1550 nb_hw_breakpoint--;
1551 break;
1552
1553 case GDB_WATCHPOINT_WRITE:
1554 case GDB_WATCHPOINT_READ:
1555 case GDB_WATCHPOINT_ACCESS:
1556 nb_hw_watchpoint--;
1557 break;
1558
1559 default:
1560 return -ENOSYS;
1561 }
1562 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1563
1564 return 0;
1565}
1566
1567void kvm_arch_remove_all_hw_breakpoints(void)
1568{
1569 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1570}
1571
8a0548f9
BB
1572void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1573{
88365d17
BB
1574 int n;
1575
8a0548f9
BB
1576 /* Software Breakpoint updates */
1577 if (kvm_sw_breakpoints_active(cs)) {
1578 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1579 }
88365d17
BB
1580
1581 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1582 <= ARRAY_SIZE(hw_debug_points));
1583 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1584
1585 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1586 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1587 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1588 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1589 switch (hw_debug_points[n].type) {
1590 case GDB_BREAKPOINT_HW:
1591 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1592 break;
1593 case GDB_WATCHPOINT_WRITE:
1594 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1595 break;
1596 case GDB_WATCHPOINT_READ:
1597 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1598 break;
1599 case GDB_WATCHPOINT_ACCESS:
1600 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1601 KVMPPC_DEBUG_WATCH_READ;
1602 break;
1603 default:
1604 cpu_abort(cs, "Unsupported breakpoint type\n");
1605 }
1606 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1607 }
1608 }
8a0548f9
BB
1609}
1610
1611static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1612{
1613 CPUState *cs = CPU(cpu);
1614 CPUPPCState *env = &cpu->env;
1615 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1616 int handle = 0;
88365d17
BB
1617 int n;
1618 int flag = 0;
8a0548f9 1619
88365d17
BB
1620 if (cs->singlestep_enabled) {
1621 handle = 1;
1622 } else if (arch_info->status) {
1623 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1624 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1625 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1626 if (n >= 0) {
1627 handle = 1;
1628 }
1629 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1630 KVMPPC_DEBUG_WATCH_WRITE)) {
1631 n = find_hw_watchpoint(arch_info->address, &flag);
1632 if (n >= 0) {
1633 handle = 1;
1634 cs->watchpoint_hit = &hw_watchpoint;
1635 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1636 hw_watchpoint.flags = flag;
1637 }
1638 }
1639 }
1640 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
8a0548f9
BB
1641 handle = 1;
1642 } else {
1643 /* QEMU is not able to handle debug exception, so inject
1644 * program exception to guest;
1645 * Yes program exception NOT debug exception !!
88365d17
BB
1646 * When QEMU is using debug resources then debug exception must
1647 * be always set. To achieve this we set MSR_DE and also set
1648 * MSRP_DEP so guest cannot change MSR_DE.
1649 * When emulating debug resource for guest we want guest
1650 * to control MSR_DE (enable/disable debug interrupt on need).
1651 * Supporting both configurations are NOT possible.
1652 * So the result is that we cannot share debug resources
1653 * between QEMU and Guest on BOOKE architecture.
1654 * In the current design QEMU gets the priority over guest,
1655 * this means that if QEMU is using debug resources then guest
1656 * cannot use them;
8a0548f9
BB
1657 * For software breakpoint QEMU uses a privileged instruction;
1658 * So there cannot be any reason that we are here for guest
1659 * set debug exception, only possibility is guest executed a
1660 * privileged / illegal instruction and that's why we are
1661 * injecting a program interrupt.
1662 */
1663
1664 cpu_synchronize_state(cs);
1665 /* env->nip is PC, so increment this by 4 to use
1666 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1667 */
1668 env->nip += 4;
1669 cs->exception_index = POWERPC_EXCP_PROGRAM;
1670 env->error_code = POWERPC_EXCP_INVAL;
1671 ppc_cpu_do_interrupt(cs);
1672 }
1673
1674 return handle;
1675}
1676
20d695a9 1677int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1678{
20d695a9
AF
1679 PowerPCCPU *cpu = POWERPC_CPU(cs);
1680 CPUPPCState *env = &cpu->env;
bb4ea393 1681 int ret;
d76d1650 1682
4b8523ee
JK
1683 qemu_mutex_lock_iothread();
1684
d76d1650
AJ
1685 switch (run->exit_reason) {
1686 case KVM_EXIT_DCR:
1687 if (run->dcr.is_write) {
da56ff91 1688 DPRINTF("handle dcr write\n");
d76d1650
AJ
1689 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1690 } else {
da56ff91 1691 DPRINTF("handle dcr read\n");
d76d1650
AJ
1692 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1693 }
1694 break;
1695 case KVM_EXIT_HLT:
da56ff91 1696 DPRINTF("handle halt\n");
259186a7 1697 ret = kvmppc_handle_halt(cpu);
d76d1650 1698 break;
c6304a4a 1699#if defined(TARGET_PPC64)
f61b4bed 1700 case KVM_EXIT_PAPR_HCALL:
da56ff91 1701 DPRINTF("handle PAPR hypercall\n");
20d695a9 1702 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1703 run->papr_hcall.nr,
f61b4bed 1704 run->papr_hcall.args);
78e8fde2 1705 ret = 0;
f61b4bed
AG
1706 break;
1707#endif
5b95b8b9 1708 case KVM_EXIT_EPR:
da56ff91 1709 DPRINTF("handle epr\n");
933b19ea 1710 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
5b95b8b9
AG
1711 ret = 0;
1712 break;
31f2cb8f 1713 case KVM_EXIT_WATCHDOG:
da56ff91 1714 DPRINTF("handle watchdog expiry\n");
31f2cb8f
BB
1715 watchdog_perform_action();
1716 ret = 0;
1717 break;
1718
8a0548f9
BB
1719 case KVM_EXIT_DEBUG:
1720 DPRINTF("handle debug exception\n");
1721 if (kvm_handle_debug(cpu, run)) {
1722 ret = EXCP_DEBUG;
1723 break;
1724 }
1725 /* re-enter, this exception was guest-internal */
1726 ret = 0;
1727 break;
1728
73aaec4a
JK
1729 default:
1730 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1731 ret = -1;
1732 break;
d76d1650
AJ
1733 }
1734
4b8523ee 1735 qemu_mutex_unlock_iothread();
d76d1650
AJ
1736 return ret;
1737}
1738
31f2cb8f
BB
1739int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1740{
1741 CPUState *cs = CPU(cpu);
1742 uint32_t bits = tsr_bits;
1743 struct kvm_one_reg reg = {
1744 .id = KVM_REG_PPC_OR_TSR,
1745 .addr = (uintptr_t) &bits,
1746 };
1747
1748 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1749}
1750
1751int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1752{
1753
1754 CPUState *cs = CPU(cpu);
1755 uint32_t bits = tsr_bits;
1756 struct kvm_one_reg reg = {
1757 .id = KVM_REG_PPC_CLEAR_TSR,
1758 .addr = (uintptr_t) &bits,
1759 };
1760
1761 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1762}
1763
1764int kvmppc_set_tcr(PowerPCCPU *cpu)
1765{
1766 CPUState *cs = CPU(cpu);
1767 CPUPPCState *env = &cpu->env;
1768 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1769
1770 struct kvm_one_reg reg = {
1771 .id = KVM_REG_PPC_TCR,
1772 .addr = (uintptr_t) &tcr,
1773 };
1774
1775 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1776}
1777
1778int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1779{
1780 CPUState *cs = CPU(cpu);
31f2cb8f
BB
1781 int ret;
1782
1783 if (!kvm_enabled()) {
1784 return -1;
1785 }
1786
1787 if (!cap_ppc_watchdog) {
1788 printf("warning: KVM does not support watchdog");
1789 return -1;
1790 }
1791
48add816 1792 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
31f2cb8f
BB
1793 if (ret < 0) {
1794 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1795 __func__, strerror(-ret));
1796 return ret;
1797 }
1798
1799 return ret;
1800}
1801
dc333cd6
AG
1802static int read_cpuinfo(const char *field, char *value, int len)
1803{
1804 FILE *f;
1805 int ret = -1;
1806 int field_len = strlen(field);
1807 char line[512];
1808
1809 f = fopen("/proc/cpuinfo", "r");
1810 if (!f) {
1811 return -1;
1812 }
1813
1814 do {
ef951443 1815 if (!fgets(line, sizeof(line), f)) {
dc333cd6
AG
1816 break;
1817 }
1818 if (!strncmp(line, field, field_len)) {
ae215068 1819 pstrcpy(value, len, line);
dc333cd6
AG
1820 ret = 0;
1821 break;
1822 }
1823 } while(*line);
1824
1825 fclose(f);
1826
1827 return ret;
1828}
1829
1830uint32_t kvmppc_get_tbfreq(void)
1831{
1832 char line[512];
1833 char *ns;
73bcb24d 1834 uint32_t retval = NANOSECONDS_PER_SECOND;
dc333cd6
AG
1835
1836 if (read_cpuinfo("timebase", line, sizeof(line))) {
1837 return retval;
1838 }
1839
1840 if (!(ns = strchr(line, ':'))) {
1841 return retval;
1842 }
1843
1844 ns++;
1845
f9b8e7f6 1846 return atoi(ns);
dc333cd6 1847}
4513d923 1848
ef951443
ND
1849bool kvmppc_get_host_serial(char **value)
1850{
1851 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1852 NULL);
1853}
1854
1855bool kvmppc_get_host_model(char **value)
1856{
1857 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1858}
1859
eadaada1
AG
1860/* Try to find a device tree node for a CPU with clock-frequency property */
1861static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1862{
1863 struct dirent *dirp;
1864 DIR *dp;
1865
1866 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1867 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1868 return -1;
1869 }
1870
1871 buf[0] = '\0';
1872 while ((dirp = readdir(dp)) != NULL) {
1873 FILE *f;
1874 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1875 dirp->d_name);
1876 f = fopen(buf, "r");
1877 if (f) {
1878 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1879 fclose(f);
1880 break;
1881 }
1882 buf[0] = '\0';
1883 }
1884 closedir(dp);
1885 if (buf[0] == '\0') {
1886 printf("Unknown host!\n");
1887 return -1;
1888 }
1889
1890 return 0;
1891}
1892
7d94a30b 1893static uint64_t kvmppc_read_int_dt(const char *filename)
eadaada1 1894{
9bc884b7
DG
1895 union {
1896 uint32_t v32;
1897 uint64_t v64;
1898 } u;
eadaada1
AG
1899 FILE *f;
1900 int len;
1901
7d94a30b 1902 f = fopen(filename, "rb");
eadaada1
AG
1903 if (!f) {
1904 return -1;
1905 }
1906
9bc884b7 1907 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1908 fclose(f);
1909 switch (len) {
9bc884b7
DG
1910 case 4:
1911 /* property is a 32-bit quantity */
1912 return be32_to_cpu(u.v32);
1913 case 8:
1914 return be64_to_cpu(u.v64);
eadaada1
AG
1915 }
1916
1917 return 0;
1918}
1919
7d94a30b
SB
1920/* Read a CPU node property from the host device tree that's a single
1921 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1922 * (can't find or open the property, or doesn't understand the
1923 * format) */
1924static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1925{
1926 char buf[PATH_MAX], *tmp;
1927 uint64_t val;
1928
1929 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1930 return -1;
1931 }
1932
1933 tmp = g_strdup_printf("%s/%s", buf, propname);
1934 val = kvmppc_read_int_dt(tmp);
1935 g_free(tmp);
1936
1937 return val;
1938}
1939
9bc884b7
DG
1940uint64_t kvmppc_get_clockfreq(void)
1941{
1942 return kvmppc_read_int_cpu_dt("clock-frequency");
1943}
1944
6659394f
DG
1945uint32_t kvmppc_get_vmx(void)
1946{
1947 return kvmppc_read_int_cpu_dt("ibm,vmx");
1948}
1949
1950uint32_t kvmppc_get_dfp(void)
1951{
1952 return kvmppc_read_int_cpu_dt("ibm,dfp");
1953}
1954
1a61a9ae
SY
1955static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1956 {
1957 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1958 CPUState *cs = CPU(cpu);
1959
6fd33a75 1960 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1a61a9ae
SY
1961 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1962 return 0;
1963 }
1964
1965 return 1;
1966}
1967
1968int kvmppc_get_hasidle(CPUPPCState *env)
1969{
1970 struct kvm_ppc_pvinfo pvinfo;
1971
1972 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1973 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1974 return 1;
1975 }
1976
1977 return 0;
1978}
1979
1328c2bf 1980int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1981{
1982 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1983 struct kvm_ppc_pvinfo pvinfo;
1984
1a61a9ae 1985 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1986 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1987 return 0;
1988 }
45024f09
AG
1989
1990 /*
d13fc32e 1991 * Fallback to always fail hypercalls regardless of endianness:
45024f09 1992 *
d13fc32e 1993 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
45024f09 1994 * li r3, -1
d13fc32e
AG
1995 * b .+8 (becomes nop in wrong endian)
1996 * bswap32(li r3, -1)
45024f09
AG
1997 */
1998
d13fc32e
AG
1999 hc[0] = cpu_to_be32(0x08000048);
2000 hc[1] = cpu_to_be32(0x3860ffff);
2001 hc[2] = cpu_to_be32(0x48000008);
2002 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
45024f09
AG
2003
2004 return 0;
2005}
2006
026bfd89
DG
2007static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2008{
2009 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2010}
2011
2012void kvmppc_enable_logical_ci_hcalls(void)
2013{
2014 /*
2015 * FIXME: it would be nice if we could detect the cases where
2016 * we're using a device which requires the in kernel
2017 * implementation of these hcalls, but the kernel lacks them and
2018 * produce a warning.
2019 */
2020 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2021 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2022}
2023
ef9971dd
AK
2024void kvmppc_enable_set_mode_hcall(void)
2025{
2026 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2027}
2028
1bc22652 2029void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 2030{
1bc22652 2031 CPUState *cs = CPU(cpu);
f61b4bed
AG
2032 int ret;
2033
48add816 2034 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
f61b4bed 2035 if (ret) {
072ed5f2
TH
2036 error_report("This vCPU type or KVM version does not support PAPR");
2037 exit(1);
94135e81 2038 }
9b00ea49
DG
2039
2040 /* Update the capability flag so we sync the right information
2041 * with kvm */
2042 cap_papr = 1;
f61b4bed
AG
2043}
2044
6db5bb0f
AK
2045int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2046{
2047 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2048}
2049
5b95b8b9
AG
2050void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2051{
5b95b8b9 2052 CPUState *cs = CPU(cpu);
5b95b8b9
AG
2053 int ret;
2054
48add816 2055 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
5b95b8b9 2056 if (ret && mpic_proxy) {
072ed5f2
TH
2057 error_report("This KVM version does not support EPR");
2058 exit(1);
5b95b8b9
AG
2059 }
2060}
2061
e97c3636
DG
2062int kvmppc_smt_threads(void)
2063{
2064 return cap_ppc_smt ? cap_ppc_smt : 1;
2065}
2066
7f763a5d 2067#ifdef TARGET_PPC64
658fa66b 2068off_t kvmppc_alloc_rma(void **rma)
354ac20a 2069{
354ac20a
DG
2070 off_t size;
2071 int fd;
2072 struct kvm_allocate_rma ret;
354ac20a
DG
2073
2074 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2075 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2076 * not necessary on this hardware
2077 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2078 *
2079 * FIXME: We should allow the user to force contiguous RMA
2080 * allocation in the cap_ppc_rma==1 case.
2081 */
2082 if (cap_ppc_rma < 2) {
2083 return 0;
2084 }
2085
2086 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2087 if (fd < 0) {
2088 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2089 strerror(errno));
2090 return -1;
2091 }
2092
2093 size = MIN(ret.rma_size, 256ul << 20);
2094
658fa66b
AK
2095 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2096 if (*rma == MAP_FAILED) {
354ac20a
DG
2097 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2098 return -1;
2099 };
2100
354ac20a
DG
2101 return size;
2102}
2103
7f763a5d
DG
2104uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2105{
f36951c1
DG
2106 struct kvm_ppc_smmu_info info;
2107 long rampagesize, best_page_shift;
2108 int i;
2109
7f763a5d
DG
2110 if (cap_ppc_rma >= 2) {
2111 return current_size;
2112 }
f36951c1
DG
2113
2114 /* Find the largest hardware supported page size that's less than
2115 * or equal to the (logical) backing page size of guest RAM */
182735ef 2116 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
f36951c1
DG
2117 rampagesize = getrampagesize();
2118 best_page_shift = 0;
2119
2120 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2121 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2122
2123 if (!sps->page_shift) {
2124 continue;
2125 }
2126
2127 if ((sps->page_shift > best_page_shift)
2128 && ((1UL << sps->page_shift) <= rampagesize)) {
2129 best_page_shift = sps->page_shift;
2130 }
2131 }
2132
7f763a5d 2133 return MIN(current_size,
f36951c1 2134 1ULL << (best_page_shift + hash_shift - 7));
7f763a5d
DG
2135}
2136#endif
2137
da95324e
AK
2138bool kvmppc_spapr_use_multitce(void)
2139{
2140 return cap_spapr_multitce;
2141}
2142
9bb62a07 2143void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
6a81dd17 2144 bool need_vfio)
0f5cb298
DG
2145{
2146 struct kvm_create_spapr_tce args = {
2147 .liobn = liobn,
2148 .window_size = window_size,
2149 };
2150 long len;
2151 int fd;
2152 void *table;
2153
b5aec396
DG
2154 /* Must set fd to -1 so we don't try to munmap when called for
2155 * destroying the table, which the upper layers -will- do
2156 */
2157 *pfd = -1;
6a81dd17 2158 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
0f5cb298
DG
2159 return NULL;
2160 }
2161
2162 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2163 if (fd < 0) {
b5aec396
DG
2164 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2165 liobn);
0f5cb298
DG
2166 return NULL;
2167 }
2168
a83000f5 2169 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
0f5cb298
DG
2170 /* FIXME: round this up to page size */
2171
74b41e56 2172 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 2173 if (table == MAP_FAILED) {
b5aec396
DG
2174 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2175 liobn);
0f5cb298
DG
2176 close(fd);
2177 return NULL;
2178 }
2179
2180 *pfd = fd;
2181 return table;
2182}
2183
523e7b8a 2184int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
0f5cb298
DG
2185{
2186 long len;
2187
2188 if (fd < 0) {
2189 return -1;
2190 }
2191
523e7b8a 2192 len = nb_table * sizeof(uint64_t);
0f5cb298
DG
2193 if ((munmap(table, len) < 0) ||
2194 (close(fd) < 0)) {
b5aec396
DG
2195 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2196 strerror(errno));
0f5cb298
DG
2197 /* Leak the table */
2198 }
2199
2200 return 0;
2201}
2202
7f763a5d
DG
2203int kvmppc_reset_htab(int shift_hint)
2204{
2205 uint32_t shift = shift_hint;
2206
ace9a2cb
DG
2207 if (!kvm_enabled()) {
2208 /* Full emulation, tell caller to allocate htab itself */
2209 return 0;
2210 }
2211 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
2212 int ret;
2213 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
2214 if (ret == -ENOTTY) {
2215 /* At least some versions of PR KVM advertise the
2216 * capability, but don't implement the ioctl(). Oops.
2217 * Return 0 so that we allocate the htab in qemu, as is
2218 * correct for PR. */
2219 return 0;
2220 } else if (ret < 0) {
7f763a5d
DG
2221 return ret;
2222 }
2223 return shift;
2224 }
2225
ace9a2cb
DG
2226 /* We have a kernel that predates the htab reset calls. For PR
2227 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2228 * this era, it has allocated a 16MB fixed size hash table
2229 * already. Kernels of this era have the GET_PVINFO capability
2230 * only on PR, so we use this hack to determine the right
2231 * answer */
2232 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2233 /* PR - tell caller to allocate htab */
2234 return 0;
2235 } else {
2236 /* HV - assume 16MB kernel allocated htab */
2237 return 24;
2238 }
7f763a5d
DG
2239}
2240
a1e98583
DG
2241static inline uint32_t mfpvr(void)
2242{
2243 uint32_t pvr;
2244
2245 asm ("mfpvr %0"
2246 : "=r"(pvr));
2247 return pvr;
2248}
2249
a7342588
DG
2250static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2251{
2252 if (on) {
2253 *word |= flags;
2254 } else {
2255 *word &= ~flags;
2256 }
2257}
2258
2985b86b 2259static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 2260{
2985b86b
AF
2261 assert(kvm_enabled());
2262}
2263
2264static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2265{
4c315c27 2266 DeviceClass *dc = DEVICE_CLASS(oc);
2985b86b 2267 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
2268 uint32_t vmx = kvmppc_get_vmx();
2269 uint32_t dfp = kvmppc_get_dfp();
0cbad81f
DG
2270 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2271 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
a1e98583 2272
cfe34f44 2273 /* Now fix up the class with information we can query from the host */
3bc9ccc0 2274 pcc->pvr = mfpvr();
a7342588 2275
70bca53f
AG
2276 if (vmx != -1) {
2277 /* Only override when we know what the host supports */
cfe34f44
AF
2278 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2279 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
2280 }
2281 if (dfp != -1) {
2282 /* Only override when we know what the host supports */
cfe34f44 2283 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 2284 }
0cbad81f
DG
2285
2286 if (dcache_size != -1) {
2287 pcc->l1_dcache_size = dcache_size;
2288 }
2289
2290 if (icache_size != -1) {
2291 pcc->l1_icache_size = icache_size;
2292 }
4c315c27
MA
2293
2294 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2295 dc->cannot_destroy_with_object_finalize_yet = true;
a1e98583
DG
2296}
2297
3b961124
SY
2298bool kvmppc_has_cap_epr(void)
2299{
2300 return cap_epr;
2301}
2302
7c43bca0
AK
2303bool kvmppc_has_cap_htab_fd(void)
2304{
2305 return cap_htab_fd;
2306}
2307
87a91de6
AG
2308bool kvmppc_has_cap_fixup_hcalls(void)
2309{
2310 return cap_fixup_hcalls;
2311}
2312
5b79b1ca
AK
2313static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2314{
2315 ObjectClass *oc = OBJECT_CLASS(pcc);
2316
2317 while (oc && !object_class_is_abstract(oc)) {
2318 oc = object_class_get_parent(oc);
2319 }
2320 assert(oc);
2321
2322 return POWERPC_CPU_CLASS(oc);
2323}
2324
5ba4576b
AF
2325static int kvm_ppc_register_host_cpu_type(void)
2326{
2327 TypeInfo type_info = {
2328 .name = TYPE_HOST_POWERPC_CPU,
2329 .instance_init = kvmppc_host_cpu_initfn,
2330 .class_init = kvmppc_host_cpu_class_init,
2331 };
2332 uint32_t host_pvr = mfpvr();
2333 PowerPCCPUClass *pvr_pcc;
5b79b1ca 2334 DeviceClass *dc;
5ba4576b
AF
2335
2336 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
3bc9ccc0
AK
2337 if (pvr_pcc == NULL) {
2338 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2339 }
5ba4576b
AF
2340 if (pvr_pcc == NULL) {
2341 return -1;
2342 }
2343 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2344 type_register(&type_info);
5b79b1ca
AK
2345
2346 /* Register generic family CPU class for a family */
2347 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2348 dc = DEVICE_CLASS(pvr_pcc);
2349 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2350 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2351 type_register(&type_info);
2352
5ba4576b
AF
2353 return 0;
2354}
2355
feaa64c4
DG
2356int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2357{
2358 struct kvm_rtas_token_args args = {
2359 .token = token,
2360 };
2361
2362 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2363 return -ENOENT;
2364 }
2365
2366 strncpy(args.name, function, sizeof(args.name));
2367
2368 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2369}
12b1143b 2370
e68cb8b4
AK
2371int kvmppc_get_htab_fd(bool write)
2372{
2373 struct kvm_get_htab_fd s = {
2374 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2375 .start_index = 0,
2376 };
2377
2378 if (!cap_htab_fd) {
2379 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2380 return -1;
2381 }
2382
2383 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2384}
2385
2386int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2387{
bc72ad67 2388 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
e68cb8b4
AK
2389 uint8_t buf[bufsize];
2390 ssize_t rc;
2391
2392 do {
2393 rc = read(fd, buf, bufsize);
2394 if (rc < 0) {
2395 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2396 strerror(errno));
2397 return rc;
2398 } else if (rc) {
e094c4c1
CLG
2399 uint8_t *buffer = buf;
2400 ssize_t n = rc;
2401 while (n) {
2402 struct kvm_get_htab_header *head =
2403 (struct kvm_get_htab_header *) buffer;
2404 size_t chunksize = sizeof(*head) +
2405 HASH_PTE_SIZE_64 * head->n_valid;
2406
2407 qemu_put_be32(f, head->index);
2408 qemu_put_be16(f, head->n_valid);
2409 qemu_put_be16(f, head->n_invalid);
2410 qemu_put_buffer(f, (void *)(head + 1),
2411 HASH_PTE_SIZE_64 * head->n_valid);
2412
2413 buffer += chunksize;
2414 n -= chunksize;
2415 }
e68cb8b4
AK
2416 }
2417 } while ((rc != 0)
2418 && ((max_ns < 0)
bc72ad67 2419 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
e68cb8b4
AK
2420
2421 return (rc == 0) ? 1 : 0;
2422}
2423
2424int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2425 uint16_t n_valid, uint16_t n_invalid)
2426{
2427 struct kvm_get_htab_header *buf;
2428 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2429 ssize_t rc;
2430
2431 buf = alloca(chunksize);
e68cb8b4
AK
2432 buf->index = index;
2433 buf->n_valid = n_valid;
2434 buf->n_invalid = n_invalid;
2435
2436 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2437
2438 rc = write(fd, buf, chunksize);
2439 if (rc < 0) {
2440 fprintf(stderr, "Error writing KVM hash table: %s\n",
2441 strerror(errno));
2442 return rc;
2443 }
2444 if (rc != chunksize) {
2445 /* We should never get a short write on a single chunk */
2446 fprintf(stderr, "Short write, restoring KVM hash table\n");
2447 return -1;
2448 }
2449 return 0;
2450}
2451
20d695a9 2452bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
2453{
2454 return true;
2455}
a1b87fe0 2456
20d695a9 2457int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
2458{
2459 return 1;
2460}
2461
2462int kvm_arch_on_sigbus(int code, void *addr)
2463{
2464 return 1;
2465}
82169660
SW
2466
2467void kvm_arch_init_irq_routing(KVMState *s)
2468{
2469}
c65f9a07 2470
7c43bca0
AK
2471struct kvm_get_htab_buf {
2472 struct kvm_get_htab_header header;
2473 /*
2474 * We require one extra byte for read
2475 */
2476 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2477};
2478
2479uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2480{
2481 int htab_fd;
2482 struct kvm_get_htab_fd ghf;
2483 struct kvm_get_htab_buf *hpte_buf;
2484
2485 ghf.flags = 0;
2486 ghf.start_index = pte_index;
2487 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2488 if (htab_fd < 0) {
2489 goto error_out;
2490 }
2491
2492 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2493 /*
2494 * Read the hpte group
2495 */
2496 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2497 goto out_close;
2498 }
2499
2500 close(htab_fd);
2501 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2502
2503out_close:
2504 g_free(hpte_buf);
2505 close(htab_fd);
2506error_out:
2507 return 0;
2508}
2509
2510void kvmppc_hash64_free_pteg(uint64_t token)
2511{
2512 struct kvm_get_htab_buf *htab_buf;
2513
2514 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2515 hpte);
2516 g_free(htab_buf);
2517 return;
2518}
c1385933
AK
2519
2520void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2521 target_ulong pte0, target_ulong pte1)
2522{
2523 int htab_fd;
2524 struct kvm_get_htab_fd ghf;
2525 struct kvm_get_htab_buf hpte_buf;
2526
2527 ghf.flags = 0;
2528 ghf.start_index = 0; /* Ignored */
2529 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2530 if (htab_fd < 0) {
2531 goto error_out;
2532 }
2533
2534 hpte_buf.header.n_valid = 1;
2535 hpte_buf.header.n_invalid = 0;
2536 hpte_buf.header.index = pte_index;
2537 hpte_buf.hpte[0] = pte0;
2538 hpte_buf.hpte[1] = pte1;
2539 /*
2540 * Write the hpte entry.
2541 * CAUTION: write() has the warn_unused_result attribute. Hence we
2542 * need to check the return value, even though we do nothing.
2543 */
2544 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2545 goto out_close;
2546 }
2547
2548out_close:
2549 close(htab_fd);
2550 return;
2551
2552error_out:
2553 return;
2554}
9e03a040
FB
2555
2556int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
dc9f06ca 2557 uint64_t address, uint32_t data, PCIDevice *dev)
9e03a040
FB
2558{
2559 return 0;
2560}
1850b6b7
EA
2561
2562int kvm_arch_msi_data_to_gsi(uint32_t data)
2563{
2564 return data & 0xffff;
2565}
4d9392be
TH
2566
2567int kvmppc_enable_hwrng(void)
2568{
2569 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2570 return -1;
2571 }
2572
2573 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2574}