]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/kvm.c
acpi: do not use TARGET_PAGE_SIZE
[mirror_qemu.git] / target-ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
0d75590d 17#include "qemu/osdep.h"
eadaada1 18#include <dirent.h>
d76d1650
AJ
19#include <sys/ioctl.h>
20#include <sys/mman.h>
4656e1f0 21#include <sys/vfs.h>
d76d1650
AJ
22
23#include <linux/kvm.h>
24
25#include "qemu-common.h"
072ed5f2 26#include "qemu/error-report.h"
1de7afc9 27#include "qemu/timer.h"
9c17d615
PB
28#include "sysemu/sysemu.h"
29#include "sysemu/kvm.h"
d76d1650
AJ
30#include "kvm_ppc.h"
31#include "cpu.h"
9c17d615
PB
32#include "sysemu/cpus.h"
33#include "sysemu/device_tree.h"
d5aea6f3 34#include "mmu-hash64.h"
d76d1650 35
f61b4bed 36#include "hw/sysbus.h"
0d09e41a
PB
37#include "hw/ppc/spapr.h"
38#include "hw/ppc/spapr_vio.h"
98a8b524 39#include "hw/ppc/ppc.h"
31f2cb8f 40#include "sysemu/watchdog.h"
b36f100e 41#include "trace.h"
88365d17 42#include "exec/gdbstub.h"
4c663752 43#include "exec/memattrs.h"
2d103aae 44#include "sysemu/hostmem.h"
f348b6d1 45#include "qemu/cutils.h"
f61b4bed 46
d76d1650
AJ
47//#define DEBUG_KVM
48
49#ifdef DEBUG_KVM
da56ff91 50#define DPRINTF(fmt, ...) \
d76d1650
AJ
51 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
52#else
da56ff91 53#define DPRINTF(fmt, ...) \
d76d1650
AJ
54 do { } while (0)
55#endif
56
eadaada1
AG
57#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
58
94a8d39a
JK
59const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
60 KVM_CAP_LAST_INFO
61};
62
fc87e185
AG
63static int cap_interrupt_unset = false;
64static int cap_interrupt_level = false;
90dc8812 65static int cap_segstate;
90dc8812 66static int cap_booke_sregs;
e97c3636 67static int cap_ppc_smt;
354ac20a 68static int cap_ppc_rma;
0f5cb298 69static int cap_spapr_tce;
da95324e 70static int cap_spapr_multitce;
9bb62a07 71static int cap_spapr_vfio;
f1af19d7 72static int cap_hior;
d67d40ea 73static int cap_one_reg;
3b961124 74static int cap_epr;
31f2cb8f 75static int cap_ppc_watchdog;
9b00ea49 76static int cap_papr;
e68cb8b4 77static int cap_htab_fd;
87a91de6 78static int cap_fixup_hcalls;
fc87e185 79
3c902d44
BB
80static uint32_t debug_inst_opcode;
81
c821c2bd
AG
82/* XXX We have a race condition where we actually have a level triggered
83 * interrupt, but the infrastructure can't expose that yet, so the guest
84 * takes but ignores it, goes to sleep and never gets notified that there's
85 * still an interrupt pending.
c6a94ba5 86 *
c821c2bd
AG
87 * As a quick workaround, let's just wake up again 20 ms after we injected
88 * an interrupt. That way we can assure that we're always reinjecting
89 * interrupts in case the guest swallowed them.
c6a94ba5
AG
90 */
91static QEMUTimer *idle_timer;
92
d5a68146 93static void kvm_kick_cpu(void *opaque)
c6a94ba5 94{
d5a68146 95 PowerPCCPU *cpu = opaque;
d5a68146 96
c08d7424 97 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
98}
99
5ba4576b
AF
100static int kvm_ppc_register_host_cpu_type(void);
101
b16565b3 102int kvm_arch_init(MachineState *ms, KVMState *s)
d76d1650 103{
fc87e185 104 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 105 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 106 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 107 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 108 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 109 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 110 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
da95324e 111 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
9bb62a07 112 cap_spapr_vfio = false;
d67d40ea 113 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 114 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
3b961124 115 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
31f2cb8f 116 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
9b00ea49
DG
117 /* Note: we don't set cap_papr here, because this capability is
118 * only activated after this by kvmppc_set_papr() */
e68cb8b4 119 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
87a91de6 120 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
fc87e185
AG
121
122 if (!cap_interrupt_level) {
123 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
124 "VM to stall at times!\n");
125 }
126
5ba4576b
AF
127 kvm_ppc_register_host_cpu_type();
128
d76d1650
AJ
129 return 0;
130}
131
1bc22652 132static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 133{
1bc22652
AF
134 CPUPPCState *cenv = &cpu->env;
135 CPUState *cs = CPU(cpu);
861bbc80 136 struct kvm_sregs sregs;
5666ca4a
SW
137 int ret;
138
139 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
140 /* What we're really trying to say is "if we're on BookE, we use
141 the native PVR for now". This is the only sane way to check
142 it though, so we potentially confuse users that they can run
143 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
144 return 0;
145 } else {
90dc8812 146 if (!cap_segstate) {
64e07be5
AG
147 fprintf(stderr, "kvm error: missing PVR setting capability\n");
148 return -ENOSYS;
5666ca4a 149 }
5666ca4a
SW
150 }
151
1bc22652 152 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
153 if (ret) {
154 return ret;
155 }
861bbc80
AG
156
157 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 158 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
159}
160
93dd5e85 161/* Set up a shared TLB array with KVM */
1bc22652 162static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 163{
1bc22652
AF
164 CPUPPCState *env = &cpu->env;
165 CPUState *cs = CPU(cpu);
93dd5e85
SW
166 struct kvm_book3e_206_tlb_params params = {};
167 struct kvm_config_tlb cfg = {};
93dd5e85
SW
168 unsigned int entries = 0;
169 int ret, i;
170
171 if (!kvm_enabled() ||
a60f24b5 172 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
173 return 0;
174 }
175
176 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
177
178 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
179 params.tlb_sizes[i] = booke206_tlb_size(env, i);
180 params.tlb_ways[i] = booke206_tlb_ways(env, i);
181 entries += params.tlb_sizes[i];
182 }
183
184 assert(entries == env->nb_tlb);
185 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
186
187 env->tlb_dirty = true;
188
189 cfg.array = (uintptr_t)env->tlb.tlbm;
190 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
191 cfg.params = (uintptr_t)&params;
192 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
193
48add816 194 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
93dd5e85
SW
195 if (ret < 0) {
196 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
197 __func__, strerror(-ret));
198 return ret;
199 }
200
201 env->kvm_sw_tlb = true;
202 return 0;
203}
204
4656e1f0
BH
205
206#if defined(TARGET_PPC64)
a60f24b5 207static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
208 struct kvm_ppc_smmu_info *info)
209{
a60f24b5
AF
210 CPUPPCState *env = &cpu->env;
211 CPUState *cs = CPU(cpu);
212
4656e1f0
BH
213 memset(info, 0, sizeof(*info));
214
215 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
216 * need to "guess" what the supported page sizes are.
217 *
218 * For that to work we make a few assumptions:
219 *
220 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
221 * KVM which only supports 4K and 16M pages, but supports them
222 * regardless of the backing store characteritics. We also don't
223 * support 1T segments.
224 *
225 * This is safe as if HV KVM ever supports that capability or PR
226 * KVM grows supports for more page/segment sizes, those versions
227 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
228 * will not hit this fallback
229 *
230 * - Else we are running HV KVM. This means we only support page
231 * sizes that fit in the backing store. Additionally we only
232 * advertize 64K pages if the processor is ARCH 2.06 and we assume
233 * P7 encodings for the SLB and hash table. Here too, we assume
234 * support for any newer processor will mean a kernel that
235 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
236 * this fallback.
237 */
a60f24b5 238 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
4656e1f0
BH
239 /* No flags */
240 info->flags = 0;
241 info->slb_size = 64;
242
243 /* Standard 4k base page size segment */
244 info->sps[0].page_shift = 12;
245 info->sps[0].slb_enc = 0;
246 info->sps[0].enc[0].page_shift = 12;
247 info->sps[0].enc[0].pte_enc = 0;
248
249 /* Standard 16M large page size segment */
250 info->sps[1].page_shift = 24;
251 info->sps[1].slb_enc = SLB_VSID_L;
252 info->sps[1].enc[0].page_shift = 24;
253 info->sps[1].enc[0].pte_enc = 0;
254 } else {
255 int i = 0;
256
257 /* HV KVM has backing store size restrictions */
258 info->flags = KVM_PPC_PAGE_SIZES_REAL;
259
260 if (env->mmu_model & POWERPC_MMU_1TSEG) {
261 info->flags |= KVM_PPC_1T_SEGMENTS;
262 }
263
aa4bb587
BH
264 if (env->mmu_model == POWERPC_MMU_2_06 ||
265 env->mmu_model == POWERPC_MMU_2_07) {
4656e1f0
BH
266 info->slb_size = 32;
267 } else {
268 info->slb_size = 64;
269 }
270
271 /* Standard 4k base page size segment */
272 info->sps[i].page_shift = 12;
273 info->sps[i].slb_enc = 0;
274 info->sps[i].enc[0].page_shift = 12;
275 info->sps[i].enc[0].pte_enc = 0;
276 i++;
277
aa4bb587
BH
278 /* 64K on MMU 2.06 and later */
279 if (env->mmu_model == POWERPC_MMU_2_06 ||
280 env->mmu_model == POWERPC_MMU_2_07) {
4656e1f0
BH
281 info->sps[i].page_shift = 16;
282 info->sps[i].slb_enc = 0x110;
283 info->sps[i].enc[0].page_shift = 16;
284 info->sps[i].enc[0].pte_enc = 1;
285 i++;
286 }
287
288 /* Standard 16M large page size segment */
289 info->sps[i].page_shift = 24;
290 info->sps[i].slb_enc = SLB_VSID_L;
291 info->sps[i].enc[0].page_shift = 24;
292 info->sps[i].enc[0].pte_enc = 0;
293 }
294}
295
a60f24b5 296static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 297{
a60f24b5 298 CPUState *cs = CPU(cpu);
4656e1f0
BH
299 int ret;
300
a60f24b5
AF
301 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
302 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
303 if (ret == 0) {
304 return;
305 }
306 }
307
a60f24b5 308 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
309}
310
2d103aae 311static long gethugepagesize(const char *mem_path)
4656e1f0
BH
312{
313 struct statfs fs;
314 int ret;
315
4656e1f0
BH
316 do {
317 ret = statfs(mem_path, &fs);
318 } while (ret != 0 && errno == EINTR);
319
320 if (ret != 0) {
321 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
322 strerror(errno));
323 exit(1);
324 }
325
326#define HUGETLBFS_MAGIC 0x958458f6
327
328 if (fs.f_type != HUGETLBFS_MAGIC) {
329 /* Explicit mempath, but it's ordinary pages */
330 return getpagesize();
331 }
332
333 /* It's hugepage, return the huge page size */
334 return fs.f_bsize;
335}
336
3be5cc23
MA
337/*
338 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
339 * may or may not name the same files / on the same filesystem now as
340 * when we actually open and map them. Iterate over the file
341 * descriptors instead, and use qemu_fd_getpagesize().
342 */
2d103aae
MR
343static int find_max_supported_pagesize(Object *obj, void *opaque)
344{
345 char *mem_path;
346 long *hpsize_min = opaque;
347
348 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
349 mem_path = object_property_get_str(obj, "mem-path", NULL);
350 if (mem_path) {
351 long hpsize = gethugepagesize(mem_path);
352 if (hpsize < *hpsize_min) {
353 *hpsize_min = hpsize;
354 }
355 } else {
356 *hpsize_min = getpagesize();
357 }
358 }
359
360 return 0;
361}
362
363static long getrampagesize(void)
364{
365 long hpsize = LONG_MAX;
366 Object *memdev_root;
367
368 if (mem_path) {
369 return gethugepagesize(mem_path);
370 }
371
372 /* it's possible we have memory-backend objects with
373 * hugepage-backed RAM. these may get mapped into system
374 * address space via -numa parameters or memory hotplug
375 * hooks. we want to take these into account, but we
376 * also want to make sure these supported hugepage
377 * sizes are applicable across the entire range of memory
378 * we may boot from, so we take the min across all
379 * backends, and assume normal pages in cases where a
380 * backend isn't backed by hugepages.
381 */
382 memdev_root = object_resolve_path("/objects", NULL);
383 if (!memdev_root) {
384 return getpagesize();
385 }
386
387 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
388
389 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
390}
391
4656e1f0
BH
392static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
393{
394 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
395 return true;
396 }
397
398 return (1ul << shift) <= rampgsize;
399}
400
a60f24b5 401static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
402{
403 static struct kvm_ppc_smmu_info smmu_info;
404 static bool has_smmu_info;
a60f24b5 405 CPUPPCState *env = &cpu->env;
4656e1f0
BH
406 long rampagesize;
407 int iq, ik, jq, jk;
408
409 /* We only handle page sizes for 64-bit server guests for now */
410 if (!(env->mmu_model & POWERPC_MMU_64)) {
411 return;
412 }
413
414 /* Collect MMU info from kernel if not already */
415 if (!has_smmu_info) {
a60f24b5 416 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
417 has_smmu_info = true;
418 }
419
420 rampagesize = getrampagesize();
421
422 /* Convert to QEMU form */
423 memset(&env->sps, 0, sizeof(env->sps));
424
90da0d5a
BH
425 /* If we have HV KVM, we need to forbid CI large pages if our
426 * host page size is smaller than 64K.
427 */
428 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
429 env->ci_large_pages = getpagesize() >= 0x10000;
430 }
431
08215d8f
AG
432 /*
433 * XXX This loop should be an entry wide AND of the capabilities that
434 * the selected CPU has with the capabilities that KVM supports.
435 */
4656e1f0
BH
436 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
437 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
438 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
439
440 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
441 ksps->page_shift)) {
442 continue;
443 }
444 qsps->page_shift = ksps->page_shift;
445 qsps->slb_enc = ksps->slb_enc;
446 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
447 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
448 ksps->enc[jk].page_shift)) {
449 continue;
450 }
451 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
452 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
453 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
454 break;
455 }
456 }
457 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
458 break;
459 }
460 }
461 env->slb_nr = smmu_info.slb_size;
08215d8f 462 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
4656e1f0
BH
463 env->mmu_model &= ~POWERPC_MMU_1TSEG;
464 }
465}
466#else /* defined (TARGET_PPC64) */
467
a60f24b5 468static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
469{
470}
471
472#endif /* !defined (TARGET_PPC64) */
473
b164e48e
EH
474unsigned long kvm_arch_vcpu_id(CPUState *cpu)
475{
0f20ba62 476 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
b164e48e
EH
477}
478
88365d17
BB
479/* e500 supports 2 h/w breakpoint and 2 watchpoint.
480 * book3s supports only 1 watchpoint, so array size
481 * of 4 is sufficient for now.
482 */
483#define MAX_HW_BKPTS 4
484
485static struct HWBreakpoint {
486 target_ulong addr;
487 int type;
488} hw_debug_points[MAX_HW_BKPTS];
489
490static CPUWatchpoint hw_watchpoint;
491
492/* Default there is no breakpoint and watchpoint supported */
493static int max_hw_breakpoint;
494static int max_hw_watchpoint;
495static int nb_hw_breakpoint;
496static int nb_hw_watchpoint;
497
498static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
499{
500 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
501 max_hw_breakpoint = 2;
502 max_hw_watchpoint = 2;
503 }
504
505 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
506 fprintf(stderr, "Error initializing h/w breakpoints\n");
507 return;
508 }
509}
510
20d695a9 511int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 512{
20d695a9
AF
513 PowerPCCPU *cpu = POWERPC_CPU(cs);
514 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
515 int ret;
516
4656e1f0 517 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 518 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
519
520 /* Synchronize sregs with kvm */
1bc22652 521 ret = kvm_arch_sync_sregs(cpu);
5666ca4a 522 if (ret) {
388e47c7
TH
523 if (ret == -EINVAL) {
524 error_report("Register sync failed... If you're using kvm-hv.ko,"
525 " only \"-cpu host\" is possible");
526 }
5666ca4a
SW
527 return ret;
528 }
861bbc80 529
bc72ad67 530 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
c821c2bd 531
93dd5e85
SW
532 /* Some targets support access to KVM's guest TLB. */
533 switch (cenv->mmu_model) {
534 case POWERPC_MMU_BOOKE206:
1bc22652 535 ret = kvm_booke206_tlb_init(cpu);
93dd5e85
SW
536 break;
537 default:
538 break;
539 }
540
3c902d44 541 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
88365d17 542 kvmppc_hw_debug_points_init(cenv);
3c902d44 543
861bbc80 544 return ret;
d76d1650
AJ
545}
546
1bc22652 547static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 548{
1bc22652
AF
549 CPUPPCState *env = &cpu->env;
550 CPUState *cs = CPU(cpu);
93dd5e85
SW
551 struct kvm_dirty_tlb dirty_tlb;
552 unsigned char *bitmap;
553 int ret;
554
555 if (!env->kvm_sw_tlb) {
556 return;
557 }
558
559 bitmap = g_malloc((env->nb_tlb + 7) / 8);
560 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
561
562 dirty_tlb.bitmap = (uintptr_t)bitmap;
563 dirty_tlb.num_dirty = env->nb_tlb;
564
1bc22652 565 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
566 if (ret) {
567 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
568 __func__, strerror(-ret));
569 }
570
571 g_free(bitmap);
572}
573
d67d40ea
DG
574static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
575{
576 PowerPCCPU *cpu = POWERPC_CPU(cs);
577 CPUPPCState *env = &cpu->env;
578 union {
579 uint32_t u32;
580 uint64_t u64;
581 } val;
582 struct kvm_one_reg reg = {
583 .id = id,
584 .addr = (uintptr_t) &val,
585 };
586 int ret;
587
588 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
589 if (ret != 0) {
b36f100e 590 trace_kvm_failed_spr_get(spr, strerror(errno));
d67d40ea
DG
591 } else {
592 switch (id & KVM_REG_SIZE_MASK) {
593 case KVM_REG_SIZE_U32:
594 env->spr[spr] = val.u32;
595 break;
596
597 case KVM_REG_SIZE_U64:
598 env->spr[spr] = val.u64;
599 break;
600
601 default:
602 /* Don't handle this size yet */
603 abort();
604 }
605 }
606}
607
608static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
609{
610 PowerPCCPU *cpu = POWERPC_CPU(cs);
611 CPUPPCState *env = &cpu->env;
612 union {
613 uint32_t u32;
614 uint64_t u64;
615 } val;
616 struct kvm_one_reg reg = {
617 .id = id,
618 .addr = (uintptr_t) &val,
619 };
620 int ret;
621
622 switch (id & KVM_REG_SIZE_MASK) {
623 case KVM_REG_SIZE_U32:
624 val.u32 = env->spr[spr];
625 break;
626
627 case KVM_REG_SIZE_U64:
628 val.u64 = env->spr[spr];
629 break;
630
631 default:
632 /* Don't handle this size yet */
633 abort();
634 }
635
636 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
637 if (ret != 0) {
b36f100e 638 trace_kvm_failed_spr_set(spr, strerror(errno));
d67d40ea
DG
639 }
640}
641
70b79849
DG
642static int kvm_put_fp(CPUState *cs)
643{
644 PowerPCCPU *cpu = POWERPC_CPU(cs);
645 CPUPPCState *env = &cpu->env;
646 struct kvm_one_reg reg;
647 int i;
648 int ret;
649
650 if (env->insns_flags & PPC_FLOAT) {
651 uint64_t fpscr = env->fpscr;
652 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
653
654 reg.id = KVM_REG_PPC_FPSCR;
655 reg.addr = (uintptr_t)&fpscr;
656 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
657 if (ret < 0) {
da56ff91 658 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
70b79849
DG
659 return ret;
660 }
661
662 for (i = 0; i < 32; i++) {
663 uint64_t vsr[2];
664
3a4b791b 665#ifdef HOST_WORDS_BIGENDIAN
70b79849
DG
666 vsr[0] = float64_val(env->fpr[i]);
667 vsr[1] = env->vsr[i];
3a4b791b
GK
668#else
669 vsr[0] = env->vsr[i];
670 vsr[1] = float64_val(env->fpr[i]);
671#endif
70b79849
DG
672 reg.addr = (uintptr_t) &vsr;
673 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
674
675 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
676 if (ret < 0) {
da56ff91 677 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
70b79849
DG
678 i, strerror(errno));
679 return ret;
680 }
681 }
682 }
683
684 if (env->insns_flags & PPC_ALTIVEC) {
685 reg.id = KVM_REG_PPC_VSCR;
686 reg.addr = (uintptr_t)&env->vscr;
687 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
688 if (ret < 0) {
da56ff91 689 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
70b79849
DG
690 return ret;
691 }
692
693 for (i = 0; i < 32; i++) {
694 reg.id = KVM_REG_PPC_VR(i);
695 reg.addr = (uintptr_t)&env->avr[i];
696 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
697 if (ret < 0) {
da56ff91 698 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
70b79849
DG
699 return ret;
700 }
701 }
702 }
703
704 return 0;
705}
706
707static int kvm_get_fp(CPUState *cs)
708{
709 PowerPCCPU *cpu = POWERPC_CPU(cs);
710 CPUPPCState *env = &cpu->env;
711 struct kvm_one_reg reg;
712 int i;
713 int ret;
714
715 if (env->insns_flags & PPC_FLOAT) {
716 uint64_t fpscr;
717 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
718
719 reg.id = KVM_REG_PPC_FPSCR;
720 reg.addr = (uintptr_t)&fpscr;
721 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
722 if (ret < 0) {
da56ff91 723 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
70b79849
DG
724 return ret;
725 } else {
726 env->fpscr = fpscr;
727 }
728
729 for (i = 0; i < 32; i++) {
730 uint64_t vsr[2];
731
732 reg.addr = (uintptr_t) &vsr;
733 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
734
735 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
736 if (ret < 0) {
da56ff91 737 DPRINTF("Unable to get %s%d from KVM: %s\n",
70b79849
DG
738 vsx ? "VSR" : "FPR", i, strerror(errno));
739 return ret;
740 } else {
3a4b791b 741#ifdef HOST_WORDS_BIGENDIAN
70b79849
DG
742 env->fpr[i] = vsr[0];
743 if (vsx) {
744 env->vsr[i] = vsr[1];
745 }
3a4b791b
GK
746#else
747 env->fpr[i] = vsr[1];
748 if (vsx) {
749 env->vsr[i] = vsr[0];
750 }
751#endif
70b79849
DG
752 }
753 }
754 }
755
756 if (env->insns_flags & PPC_ALTIVEC) {
757 reg.id = KVM_REG_PPC_VSCR;
758 reg.addr = (uintptr_t)&env->vscr;
759 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
760 if (ret < 0) {
da56ff91 761 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
70b79849
DG
762 return ret;
763 }
764
765 for (i = 0; i < 32; i++) {
766 reg.id = KVM_REG_PPC_VR(i);
767 reg.addr = (uintptr_t)&env->avr[i];
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
769 if (ret < 0) {
da56ff91 770 DPRINTF("Unable to get VR%d from KVM: %s\n",
70b79849
DG
771 i, strerror(errno));
772 return ret;
773 }
774 }
775 }
776
777 return 0;
778}
779
9b00ea49
DG
780#if defined(TARGET_PPC64)
781static int kvm_get_vpa(CPUState *cs)
782{
783 PowerPCCPU *cpu = POWERPC_CPU(cs);
784 CPUPPCState *env = &cpu->env;
785 struct kvm_one_reg reg;
786 int ret;
787
788 reg.id = KVM_REG_PPC_VPA_ADDR;
789 reg.addr = (uintptr_t)&env->vpa_addr;
790 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
791 if (ret < 0) {
da56ff91 792 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
9b00ea49
DG
793 return ret;
794 }
795
796 assert((uintptr_t)&env->slb_shadow_size
797 == ((uintptr_t)&env->slb_shadow_addr + 8));
798 reg.id = KVM_REG_PPC_VPA_SLB;
799 reg.addr = (uintptr_t)&env->slb_shadow_addr;
800 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
801 if (ret < 0) {
da56ff91 802 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
9b00ea49
DG
803 strerror(errno));
804 return ret;
805 }
806
807 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
808 reg.id = KVM_REG_PPC_VPA_DTL;
809 reg.addr = (uintptr_t)&env->dtl_addr;
810 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
811 if (ret < 0) {
da56ff91 812 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
9b00ea49
DG
813 strerror(errno));
814 return ret;
815 }
816
817 return 0;
818}
819
820static int kvm_put_vpa(CPUState *cs)
821{
822 PowerPCCPU *cpu = POWERPC_CPU(cs);
823 CPUPPCState *env = &cpu->env;
824 struct kvm_one_reg reg;
825 int ret;
826
827 /* SLB shadow or DTL can't be registered unless a master VPA is
828 * registered. That means when restoring state, if a VPA *is*
829 * registered, we need to set that up first. If not, we need to
830 * deregister the others before deregistering the master VPA */
831 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
832
833 if (env->vpa_addr) {
834 reg.id = KVM_REG_PPC_VPA_ADDR;
835 reg.addr = (uintptr_t)&env->vpa_addr;
836 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
837 if (ret < 0) {
da56ff91 838 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
839 return ret;
840 }
841 }
842
843 assert((uintptr_t)&env->slb_shadow_size
844 == ((uintptr_t)&env->slb_shadow_addr + 8));
845 reg.id = KVM_REG_PPC_VPA_SLB;
846 reg.addr = (uintptr_t)&env->slb_shadow_addr;
847 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
848 if (ret < 0) {
da56ff91 849 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
9b00ea49
DG
850 return ret;
851 }
852
853 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
854 reg.id = KVM_REG_PPC_VPA_DTL;
855 reg.addr = (uintptr_t)&env->dtl_addr;
856 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
857 if (ret < 0) {
da56ff91 858 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
9b00ea49
DG
859 strerror(errno));
860 return ret;
861 }
862
863 if (!env->vpa_addr) {
864 reg.id = KVM_REG_PPC_VPA_ADDR;
865 reg.addr = (uintptr_t)&env->vpa_addr;
866 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
867 if (ret < 0) {
da56ff91 868 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
869 return ret;
870 }
871 }
872
873 return 0;
874}
875#endif /* TARGET_PPC64 */
876
e5c0d3ce 877int kvmppc_put_books_sregs(PowerPCCPU *cpu)
a7a00a72
DG
878{
879 CPUPPCState *env = &cpu->env;
880 struct kvm_sregs sregs;
881 int i;
882
883 sregs.pvr = env->spr[SPR_PVR];
884
885 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
886
887 /* Sync SLB */
888#ifdef TARGET_PPC64
889 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
890 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
891 if (env->slb[i].esid & SLB_ESID_V) {
892 sregs.u.s.ppc64.slb[i].slbe |= i;
893 }
894 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
895 }
896#endif
897
898 /* Sync SRs */
899 for (i = 0; i < 16; i++) {
900 sregs.u.s.ppc32.sr[i] = env->sr[i];
901 }
902
903 /* Sync BATs */
904 for (i = 0; i < 8; i++) {
905 /* Beware. We have to swap upper and lower bits here */
906 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
907 | env->DBAT[1][i];
908 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
909 | env->IBAT[1][i];
910 }
911
912 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
913}
914
20d695a9 915int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 916{
20d695a9
AF
917 PowerPCCPU *cpu = POWERPC_CPU(cs);
918 CPUPPCState *env = &cpu->env;
d76d1650
AJ
919 struct kvm_regs regs;
920 int ret;
921 int i;
922
1bc22652
AF
923 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
924 if (ret < 0) {
d76d1650 925 return ret;
1bc22652 926 }
d76d1650
AJ
927
928 regs.ctr = env->ctr;
929 regs.lr = env->lr;
da91a00f 930 regs.xer = cpu_read_xer(env);
d76d1650
AJ
931 regs.msr = env->msr;
932 regs.pc = env->nip;
933
934 regs.srr0 = env->spr[SPR_SRR0];
935 regs.srr1 = env->spr[SPR_SRR1];
936
937 regs.sprg0 = env->spr[SPR_SPRG0];
938 regs.sprg1 = env->spr[SPR_SPRG1];
939 regs.sprg2 = env->spr[SPR_SPRG2];
940 regs.sprg3 = env->spr[SPR_SPRG3];
941 regs.sprg4 = env->spr[SPR_SPRG4];
942 regs.sprg5 = env->spr[SPR_SPRG5];
943 regs.sprg6 = env->spr[SPR_SPRG6];
944 regs.sprg7 = env->spr[SPR_SPRG7];
945
90dc8812
SW
946 regs.pid = env->spr[SPR_BOOKE_PID];
947
d76d1650
AJ
948 for (i = 0;i < 32; i++)
949 regs.gpr[i] = env->gpr[i];
950
4bddaf55
AK
951 regs.cr = 0;
952 for (i = 0; i < 8; i++) {
953 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
954 }
955
1bc22652 956 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
957 if (ret < 0)
958 return ret;
959
70b79849
DG
960 kvm_put_fp(cs);
961
93dd5e85 962 if (env->tlb_dirty) {
1bc22652 963 kvm_sw_tlb_put(cpu);
93dd5e85
SW
964 env->tlb_dirty = false;
965 }
966
f1af19d7 967 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
a7a00a72
DG
968 ret = kvmppc_put_books_sregs(cpu);
969 if (ret < 0) {
f1af19d7
DG
970 return ret;
971 }
972 }
973
974 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
975 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
976 }
f1af19d7 977
d67d40ea
DG
978 if (cap_one_reg) {
979 int i;
980
981 /* We deliberately ignore errors here, for kernels which have
982 * the ONE_REG calls, but don't support the specific
983 * registers, there's a reasonable chance things will still
984 * work, at least until we try to migrate. */
985 for (i = 0; i < 1024; i++) {
986 uint64_t id = env->spr_cb[i].one_reg_id;
987
988 if (id != 0) {
989 kvm_put_one_spr(cs, id, i);
990 }
f1af19d7 991 }
9b00ea49
DG
992
993#ifdef TARGET_PPC64
80b3f79b
AK
994 if (msr_ts) {
995 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
996 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
997 }
998 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
999 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1000 }
1001 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1002 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1003 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1004 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1005 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1006 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1007 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1008 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1009 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1010 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1011 }
1012
9b00ea49
DG
1013 if (cap_papr) {
1014 if (kvm_put_vpa(cs) < 0) {
da56ff91 1015 DPRINTF("Warning: Unable to set VPA information to KVM\n");
9b00ea49
DG
1016 }
1017 }
98a8b524
AK
1018
1019 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1020#endif /* TARGET_PPC64 */
f1af19d7
DG
1021 }
1022
d76d1650
AJ
1023 return ret;
1024}
1025
c371c2e3
BB
1026static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1027{
1028 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1029}
1030
a7a00a72
DG
1031static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1032{
1033 CPUPPCState *env = &cpu->env;
1034 struct kvm_sregs sregs;
1035 int ret;
1036
1037 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1038 if (ret < 0) {
1039 return ret;
1040 }
1041
1042 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1043 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1044 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1045 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1046 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1047 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1048 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1049 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1050 env->spr[SPR_DECR] = sregs.u.e.dec;
1051 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1052 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1053 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1054 }
1055
1056 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1057 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1058 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1059 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1060 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1061 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1062 }
1063
1064 if (sregs.u.e.features & KVM_SREGS_E_64) {
1065 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1066 }
1067
1068 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1069 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1070 }
1071
1072 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1073 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1074 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1075 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1076 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1077 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1078 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1079 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1080 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1081 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1082 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1083 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1084 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1085 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1086 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1087 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1088 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1089 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1090 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1091 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1092 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1093 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1094 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1095 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1096 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1097 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1098 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1099 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1100 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1101 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1102 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1103 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1104 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1105
1106 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1107 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1108 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1109 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1110 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1111 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1112 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1113 }
1114
1115 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1116 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1117 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1118 }
1119
1120 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1121 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1122 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1123 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1124 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1125 }
1126 }
1127
1128 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1129 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1130 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1131 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1132 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1133 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1134 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1135 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1136 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1137 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1138 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1139 }
1140
1141 if (sregs.u.e.features & KVM_SREGS_EXP) {
1142 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1143 }
1144
1145 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1146 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1147 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1148 }
1149
1150 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1151 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1152 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1153 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1154
1155 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1156 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1157 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1158 }
1159 }
1160
1161 return 0;
1162}
1163
1164static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1165{
1166 CPUPPCState *env = &cpu->env;
1167 struct kvm_sregs sregs;
1168 int ret;
1169 int i;
1170
1171 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1172 if (ret < 0) {
1173 return ret;
1174 }
1175
1176 if (!env->external_htab) {
1177 ppc_store_sdr1(env, sregs.u.s.sdr1);
1178 }
1179
1180 /* Sync SLB */
1181#ifdef TARGET_PPC64
1182 /*
1183 * The packed SLB array we get from KVM_GET_SREGS only contains
1184 * information about valid entries. So we flush our internal copy
1185 * to get rid of stale ones, then put all valid SLB entries back
1186 * in.
1187 */
1188 memset(env->slb, 0, sizeof(env->slb));
1189 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1190 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1191 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1192 /*
1193 * Only restore valid entries
1194 */
1195 if (rb & SLB_ESID_V) {
1196 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1197 }
1198 }
1199#endif
1200
1201 /* Sync SRs */
1202 for (i = 0; i < 16; i++) {
1203 env->sr[i] = sregs.u.s.ppc32.sr[i];
1204 }
1205
1206 /* Sync BATs */
1207 for (i = 0; i < 8; i++) {
1208 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1209 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1210 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1211 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1212 }
1213
1214 return 0;
1215}
1216
20d695a9 1217int kvm_arch_get_registers(CPUState *cs)
d76d1650 1218{
20d695a9
AF
1219 PowerPCCPU *cpu = POWERPC_CPU(cs);
1220 CPUPPCState *env = &cpu->env;
d76d1650 1221 struct kvm_regs regs;
90dc8812 1222 uint32_t cr;
138b38b6 1223 int i, ret;
d76d1650 1224
1bc22652 1225 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
1226 if (ret < 0)
1227 return ret;
1228
90dc8812
SW
1229 cr = regs.cr;
1230 for (i = 7; i >= 0; i--) {
1231 env->crf[i] = cr & 15;
1232 cr >>= 4;
1233 }
ba5e5090 1234
d76d1650
AJ
1235 env->ctr = regs.ctr;
1236 env->lr = regs.lr;
da91a00f 1237 cpu_write_xer(env, regs.xer);
d76d1650
AJ
1238 env->msr = regs.msr;
1239 env->nip = regs.pc;
1240
1241 env->spr[SPR_SRR0] = regs.srr0;
1242 env->spr[SPR_SRR1] = regs.srr1;
1243
1244 env->spr[SPR_SPRG0] = regs.sprg0;
1245 env->spr[SPR_SPRG1] = regs.sprg1;
1246 env->spr[SPR_SPRG2] = regs.sprg2;
1247 env->spr[SPR_SPRG3] = regs.sprg3;
1248 env->spr[SPR_SPRG4] = regs.sprg4;
1249 env->spr[SPR_SPRG5] = regs.sprg5;
1250 env->spr[SPR_SPRG6] = regs.sprg6;
1251 env->spr[SPR_SPRG7] = regs.sprg7;
1252
90dc8812
SW
1253 env->spr[SPR_BOOKE_PID] = regs.pid;
1254
d76d1650
AJ
1255 for (i = 0;i < 32; i++)
1256 env->gpr[i] = regs.gpr[i];
1257
70b79849
DG
1258 kvm_get_fp(cs);
1259
90dc8812 1260 if (cap_booke_sregs) {
a7a00a72 1261 ret = kvmppc_get_booke_sregs(cpu);
90dc8812
SW
1262 if (ret < 0) {
1263 return ret;
1264 }
fafc0b6a 1265 }
90dc8812 1266
90dc8812 1267 if (cap_segstate) {
a7a00a72 1268 ret = kvmppc_get_books_sregs(cpu);
90dc8812
SW
1269 if (ret < 0) {
1270 return ret;
1271 }
fafc0b6a 1272 }
ba5e5090 1273
d67d40ea
DG
1274 if (cap_hior) {
1275 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1276 }
1277
1278 if (cap_one_reg) {
1279 int i;
1280
1281 /* We deliberately ignore errors here, for kernels which have
1282 * the ONE_REG calls, but don't support the specific
1283 * registers, there's a reasonable chance things will still
1284 * work, at least until we try to migrate. */
1285 for (i = 0; i < 1024; i++) {
1286 uint64_t id = env->spr_cb[i].one_reg_id;
1287
1288 if (id != 0) {
1289 kvm_get_one_spr(cs, id, i);
1290 }
1291 }
9b00ea49
DG
1292
1293#ifdef TARGET_PPC64
80b3f79b
AK
1294 if (msr_ts) {
1295 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1296 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1297 }
1298 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1299 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1300 }
1301 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1302 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1303 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1304 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1305 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1306 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1307 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1308 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1309 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1310 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1311 }
1312
9b00ea49
DG
1313 if (cap_papr) {
1314 if (kvm_get_vpa(cs) < 0) {
da56ff91 1315 DPRINTF("Warning: Unable to get VPA information from KVM\n");
9b00ea49
DG
1316 }
1317 }
98a8b524
AK
1318
1319 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1320#endif
d67d40ea
DG
1321 }
1322
d76d1650
AJ
1323 return 0;
1324}
1325
1bc22652 1326int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
1327{
1328 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1329
1330 if (irq != PPC_INTERRUPT_EXT) {
1331 return 0;
1332 }
1333
1334 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1335 return 0;
1336 }
1337
1bc22652 1338 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
1339
1340 return 0;
1341}
1342
16415335
AG
1343#if defined(TARGET_PPCEMB)
1344#define PPC_INPUT_INT PPC40x_INPUT_INT
1345#elif defined(TARGET_PPC64)
1346#define PPC_INPUT_INT PPC970_INPUT_INT
1347#else
1348#define PPC_INPUT_INT PPC6xx_INPUT_INT
1349#endif
1350
20d695a9 1351void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 1352{
20d695a9
AF
1353 PowerPCCPU *cpu = POWERPC_CPU(cs);
1354 CPUPPCState *env = &cpu->env;
d76d1650
AJ
1355 int r;
1356 unsigned irq;
1357
4b8523ee
JK
1358 qemu_mutex_lock_iothread();
1359
5cbdb3a3 1360 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 1361 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
1362 if (!cap_interrupt_level &&
1363 run->ready_for_interrupt_injection &&
259186a7 1364 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 1365 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
1366 {
1367 /* For now KVM disregards the 'irq' argument. However, in the
1368 * future KVM could cache it in-kernel to avoid a heavyweight exit
1369 * when reading the UIC.
1370 */
fc87e185 1371 irq = KVM_INTERRUPT_SET;
d76d1650 1372
da56ff91 1373 DPRINTF("injected interrupt %d\n", irq);
1bc22652 1374 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1375 if (r < 0) {
1376 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1377 }
c821c2bd
AG
1378
1379 /* Always wake up soon in case the interrupt was level based */
bc72ad67 1380 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 1381 (NANOSECONDS_PER_SECOND / 50));
d76d1650
AJ
1382 }
1383
1384 /* We don't know if there are more interrupts pending after this. However,
1385 * the guest will return to userspace in the course of handling this one
1386 * anyways, so we will get a chance to deliver the rest. */
4b8523ee
JK
1387
1388 qemu_mutex_unlock_iothread();
d76d1650
AJ
1389}
1390
4c663752 1391MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
d76d1650 1392{
4c663752 1393 return MEMTXATTRS_UNSPECIFIED;
d76d1650
AJ
1394}
1395
20d695a9 1396int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1397{
259186a7 1398 return cs->halted;
0af691d7
MT
1399}
1400
259186a7 1401static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1402{
259186a7
AF
1403 CPUState *cs = CPU(cpu);
1404 CPUPPCState *env = &cpu->env;
1405
1406 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1407 cs->halted = 1;
27103424 1408 cs->exception_index = EXCP_HLT;
d76d1650
AJ
1409 }
1410
bb4ea393 1411 return 0;
d76d1650
AJ
1412}
1413
1414/* map dcr access to existing qemu dcr emulation */
1328c2bf 1415static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1416{
1417 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1418 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1419
bb4ea393 1420 return 0;
d76d1650
AJ
1421}
1422
1328c2bf 1423static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1424{
1425 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1426 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1427
bb4ea393 1428 return 0;
d76d1650
AJ
1429}
1430
8a0548f9
BB
1431int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1432{
1433 /* Mixed endian case is not handled */
1434 uint32_t sc = debug_inst_opcode;
1435
1436 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1437 sizeof(sc), 0) ||
1438 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1439 return -EINVAL;
1440 }
1441
1442 return 0;
1443}
1444
1445int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1446{
1447 uint32_t sc;
1448
1449 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1450 sc != debug_inst_opcode ||
1451 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1452 sizeof(sc), 1)) {
1453 return -EINVAL;
1454 }
1455
1456 return 0;
1457}
1458
88365d17
BB
1459static int find_hw_breakpoint(target_ulong addr, int type)
1460{
1461 int n;
1462
1463 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1464 <= ARRAY_SIZE(hw_debug_points));
1465
1466 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1467 if (hw_debug_points[n].addr == addr &&
1468 hw_debug_points[n].type == type) {
1469 return n;
1470 }
1471 }
1472
1473 return -1;
1474}
1475
1476static int find_hw_watchpoint(target_ulong addr, int *flag)
1477{
1478 int n;
1479
1480 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1481 if (n >= 0) {
1482 *flag = BP_MEM_ACCESS;
1483 return n;
1484 }
1485
1486 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1487 if (n >= 0) {
1488 *flag = BP_MEM_WRITE;
1489 return n;
1490 }
1491
1492 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1493 if (n >= 0) {
1494 *flag = BP_MEM_READ;
1495 return n;
1496 }
1497
1498 return -1;
1499}
1500
1501int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1502 target_ulong len, int type)
1503{
1504 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1505 return -ENOBUFS;
1506 }
1507
1508 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1509 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1510
1511 switch (type) {
1512 case GDB_BREAKPOINT_HW:
1513 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1514 return -ENOBUFS;
1515 }
1516
1517 if (find_hw_breakpoint(addr, type) >= 0) {
1518 return -EEXIST;
1519 }
1520
1521 nb_hw_breakpoint++;
1522 break;
1523
1524 case GDB_WATCHPOINT_WRITE:
1525 case GDB_WATCHPOINT_READ:
1526 case GDB_WATCHPOINT_ACCESS:
1527 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1528 return -ENOBUFS;
1529 }
1530
1531 if (find_hw_breakpoint(addr, type) >= 0) {
1532 return -EEXIST;
1533 }
1534
1535 nb_hw_watchpoint++;
1536 break;
1537
1538 default:
1539 return -ENOSYS;
1540 }
1541
1542 return 0;
1543}
1544
1545int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1546 target_ulong len, int type)
1547{
1548 int n;
1549
1550 n = find_hw_breakpoint(addr, type);
1551 if (n < 0) {
1552 return -ENOENT;
1553 }
1554
1555 switch (type) {
1556 case GDB_BREAKPOINT_HW:
1557 nb_hw_breakpoint--;
1558 break;
1559
1560 case GDB_WATCHPOINT_WRITE:
1561 case GDB_WATCHPOINT_READ:
1562 case GDB_WATCHPOINT_ACCESS:
1563 nb_hw_watchpoint--;
1564 break;
1565
1566 default:
1567 return -ENOSYS;
1568 }
1569 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1570
1571 return 0;
1572}
1573
1574void kvm_arch_remove_all_hw_breakpoints(void)
1575{
1576 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1577}
1578
8a0548f9
BB
1579void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1580{
88365d17
BB
1581 int n;
1582
8a0548f9
BB
1583 /* Software Breakpoint updates */
1584 if (kvm_sw_breakpoints_active(cs)) {
1585 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1586 }
88365d17
BB
1587
1588 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1589 <= ARRAY_SIZE(hw_debug_points));
1590 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1591
1592 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1593 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1594 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1595 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1596 switch (hw_debug_points[n].type) {
1597 case GDB_BREAKPOINT_HW:
1598 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1599 break;
1600 case GDB_WATCHPOINT_WRITE:
1601 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1602 break;
1603 case GDB_WATCHPOINT_READ:
1604 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1605 break;
1606 case GDB_WATCHPOINT_ACCESS:
1607 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1608 KVMPPC_DEBUG_WATCH_READ;
1609 break;
1610 default:
1611 cpu_abort(cs, "Unsupported breakpoint type\n");
1612 }
1613 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1614 }
1615 }
8a0548f9
BB
1616}
1617
1618static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1619{
1620 CPUState *cs = CPU(cpu);
1621 CPUPPCState *env = &cpu->env;
1622 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1623 int handle = 0;
88365d17
BB
1624 int n;
1625 int flag = 0;
8a0548f9 1626
88365d17
BB
1627 if (cs->singlestep_enabled) {
1628 handle = 1;
1629 } else if (arch_info->status) {
1630 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1631 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1632 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1633 if (n >= 0) {
1634 handle = 1;
1635 }
1636 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1637 KVMPPC_DEBUG_WATCH_WRITE)) {
1638 n = find_hw_watchpoint(arch_info->address, &flag);
1639 if (n >= 0) {
1640 handle = 1;
1641 cs->watchpoint_hit = &hw_watchpoint;
1642 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1643 hw_watchpoint.flags = flag;
1644 }
1645 }
1646 }
1647 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
8a0548f9
BB
1648 handle = 1;
1649 } else {
1650 /* QEMU is not able to handle debug exception, so inject
1651 * program exception to guest;
1652 * Yes program exception NOT debug exception !!
88365d17
BB
1653 * When QEMU is using debug resources then debug exception must
1654 * be always set. To achieve this we set MSR_DE and also set
1655 * MSRP_DEP so guest cannot change MSR_DE.
1656 * When emulating debug resource for guest we want guest
1657 * to control MSR_DE (enable/disable debug interrupt on need).
1658 * Supporting both configurations are NOT possible.
1659 * So the result is that we cannot share debug resources
1660 * between QEMU and Guest on BOOKE architecture.
1661 * In the current design QEMU gets the priority over guest,
1662 * this means that if QEMU is using debug resources then guest
1663 * cannot use them;
8a0548f9
BB
1664 * For software breakpoint QEMU uses a privileged instruction;
1665 * So there cannot be any reason that we are here for guest
1666 * set debug exception, only possibility is guest executed a
1667 * privileged / illegal instruction and that's why we are
1668 * injecting a program interrupt.
1669 */
1670
1671 cpu_synchronize_state(cs);
1672 /* env->nip is PC, so increment this by 4 to use
1673 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1674 */
1675 env->nip += 4;
1676 cs->exception_index = POWERPC_EXCP_PROGRAM;
1677 env->error_code = POWERPC_EXCP_INVAL;
1678 ppc_cpu_do_interrupt(cs);
1679 }
1680
1681 return handle;
1682}
1683
20d695a9 1684int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1685{
20d695a9
AF
1686 PowerPCCPU *cpu = POWERPC_CPU(cs);
1687 CPUPPCState *env = &cpu->env;
bb4ea393 1688 int ret;
d76d1650 1689
4b8523ee
JK
1690 qemu_mutex_lock_iothread();
1691
d76d1650
AJ
1692 switch (run->exit_reason) {
1693 case KVM_EXIT_DCR:
1694 if (run->dcr.is_write) {
da56ff91 1695 DPRINTF("handle dcr write\n");
d76d1650
AJ
1696 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1697 } else {
da56ff91 1698 DPRINTF("handle dcr read\n");
d76d1650
AJ
1699 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1700 }
1701 break;
1702 case KVM_EXIT_HLT:
da56ff91 1703 DPRINTF("handle halt\n");
259186a7 1704 ret = kvmppc_handle_halt(cpu);
d76d1650 1705 break;
c6304a4a 1706#if defined(TARGET_PPC64)
f61b4bed 1707 case KVM_EXIT_PAPR_HCALL:
da56ff91 1708 DPRINTF("handle PAPR hypercall\n");
20d695a9 1709 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1710 run->papr_hcall.nr,
f61b4bed 1711 run->papr_hcall.args);
78e8fde2 1712 ret = 0;
f61b4bed
AG
1713 break;
1714#endif
5b95b8b9 1715 case KVM_EXIT_EPR:
da56ff91 1716 DPRINTF("handle epr\n");
933b19ea 1717 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
5b95b8b9
AG
1718 ret = 0;
1719 break;
31f2cb8f 1720 case KVM_EXIT_WATCHDOG:
da56ff91 1721 DPRINTF("handle watchdog expiry\n");
31f2cb8f
BB
1722 watchdog_perform_action();
1723 ret = 0;
1724 break;
1725
8a0548f9
BB
1726 case KVM_EXIT_DEBUG:
1727 DPRINTF("handle debug exception\n");
1728 if (kvm_handle_debug(cpu, run)) {
1729 ret = EXCP_DEBUG;
1730 break;
1731 }
1732 /* re-enter, this exception was guest-internal */
1733 ret = 0;
1734 break;
1735
73aaec4a
JK
1736 default:
1737 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1738 ret = -1;
1739 break;
d76d1650
AJ
1740 }
1741
4b8523ee 1742 qemu_mutex_unlock_iothread();
d76d1650
AJ
1743 return ret;
1744}
1745
31f2cb8f
BB
1746int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1747{
1748 CPUState *cs = CPU(cpu);
1749 uint32_t bits = tsr_bits;
1750 struct kvm_one_reg reg = {
1751 .id = KVM_REG_PPC_OR_TSR,
1752 .addr = (uintptr_t) &bits,
1753 };
1754
1755 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1756}
1757
1758int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1759{
1760
1761 CPUState *cs = CPU(cpu);
1762 uint32_t bits = tsr_bits;
1763 struct kvm_one_reg reg = {
1764 .id = KVM_REG_PPC_CLEAR_TSR,
1765 .addr = (uintptr_t) &bits,
1766 };
1767
1768 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1769}
1770
1771int kvmppc_set_tcr(PowerPCCPU *cpu)
1772{
1773 CPUState *cs = CPU(cpu);
1774 CPUPPCState *env = &cpu->env;
1775 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1776
1777 struct kvm_one_reg reg = {
1778 .id = KVM_REG_PPC_TCR,
1779 .addr = (uintptr_t) &tcr,
1780 };
1781
1782 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1783}
1784
1785int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1786{
1787 CPUState *cs = CPU(cpu);
31f2cb8f
BB
1788 int ret;
1789
1790 if (!kvm_enabled()) {
1791 return -1;
1792 }
1793
1794 if (!cap_ppc_watchdog) {
1795 printf("warning: KVM does not support watchdog");
1796 return -1;
1797 }
1798
48add816 1799 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
31f2cb8f
BB
1800 if (ret < 0) {
1801 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1802 __func__, strerror(-ret));
1803 return ret;
1804 }
1805
1806 return ret;
1807}
1808
dc333cd6
AG
1809static int read_cpuinfo(const char *field, char *value, int len)
1810{
1811 FILE *f;
1812 int ret = -1;
1813 int field_len = strlen(field);
1814 char line[512];
1815
1816 f = fopen("/proc/cpuinfo", "r");
1817 if (!f) {
1818 return -1;
1819 }
1820
1821 do {
ef951443 1822 if (!fgets(line, sizeof(line), f)) {
dc333cd6
AG
1823 break;
1824 }
1825 if (!strncmp(line, field, field_len)) {
ae215068 1826 pstrcpy(value, len, line);
dc333cd6
AG
1827 ret = 0;
1828 break;
1829 }
1830 } while(*line);
1831
1832 fclose(f);
1833
1834 return ret;
1835}
1836
1837uint32_t kvmppc_get_tbfreq(void)
1838{
1839 char line[512];
1840 char *ns;
73bcb24d 1841 uint32_t retval = NANOSECONDS_PER_SECOND;
dc333cd6
AG
1842
1843 if (read_cpuinfo("timebase", line, sizeof(line))) {
1844 return retval;
1845 }
1846
1847 if (!(ns = strchr(line, ':'))) {
1848 return retval;
1849 }
1850
1851 ns++;
1852
f9b8e7f6 1853 return atoi(ns);
dc333cd6 1854}
4513d923 1855
ef951443
ND
1856bool kvmppc_get_host_serial(char **value)
1857{
1858 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1859 NULL);
1860}
1861
1862bool kvmppc_get_host_model(char **value)
1863{
1864 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1865}
1866
eadaada1
AG
1867/* Try to find a device tree node for a CPU with clock-frequency property */
1868static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1869{
1870 struct dirent *dirp;
1871 DIR *dp;
1872
1873 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1874 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1875 return -1;
1876 }
1877
1878 buf[0] = '\0';
1879 while ((dirp = readdir(dp)) != NULL) {
1880 FILE *f;
1881 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1882 dirp->d_name);
1883 f = fopen(buf, "r");
1884 if (f) {
1885 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1886 fclose(f);
1887 break;
1888 }
1889 buf[0] = '\0';
1890 }
1891 closedir(dp);
1892 if (buf[0] == '\0') {
1893 printf("Unknown host!\n");
1894 return -1;
1895 }
1896
1897 return 0;
1898}
1899
7d94a30b 1900static uint64_t kvmppc_read_int_dt(const char *filename)
eadaada1 1901{
9bc884b7
DG
1902 union {
1903 uint32_t v32;
1904 uint64_t v64;
1905 } u;
eadaada1
AG
1906 FILE *f;
1907 int len;
1908
7d94a30b 1909 f = fopen(filename, "rb");
eadaada1
AG
1910 if (!f) {
1911 return -1;
1912 }
1913
9bc884b7 1914 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1915 fclose(f);
1916 switch (len) {
9bc884b7
DG
1917 case 4:
1918 /* property is a 32-bit quantity */
1919 return be32_to_cpu(u.v32);
1920 case 8:
1921 return be64_to_cpu(u.v64);
eadaada1
AG
1922 }
1923
1924 return 0;
1925}
1926
7d94a30b
SB
1927/* Read a CPU node property from the host device tree that's a single
1928 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1929 * (can't find or open the property, or doesn't understand the
1930 * format) */
1931static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1932{
1933 char buf[PATH_MAX], *tmp;
1934 uint64_t val;
1935
1936 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1937 return -1;
1938 }
1939
1940 tmp = g_strdup_printf("%s/%s", buf, propname);
1941 val = kvmppc_read_int_dt(tmp);
1942 g_free(tmp);
1943
1944 return val;
1945}
1946
9bc884b7
DG
1947uint64_t kvmppc_get_clockfreq(void)
1948{
1949 return kvmppc_read_int_cpu_dt("clock-frequency");
1950}
1951
6659394f
DG
1952uint32_t kvmppc_get_vmx(void)
1953{
1954 return kvmppc_read_int_cpu_dt("ibm,vmx");
1955}
1956
1957uint32_t kvmppc_get_dfp(void)
1958{
1959 return kvmppc_read_int_cpu_dt("ibm,dfp");
1960}
1961
1a61a9ae
SY
1962static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1963 {
1964 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1965 CPUState *cs = CPU(cpu);
1966
6fd33a75 1967 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1a61a9ae
SY
1968 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1969 return 0;
1970 }
1971
1972 return 1;
1973}
1974
1975int kvmppc_get_hasidle(CPUPPCState *env)
1976{
1977 struct kvm_ppc_pvinfo pvinfo;
1978
1979 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1980 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1981 return 1;
1982 }
1983
1984 return 0;
1985}
1986
1328c2bf 1987int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
1988{
1989 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
1990 struct kvm_ppc_pvinfo pvinfo;
1991
1a61a9ae 1992 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 1993 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
1994 return 0;
1995 }
45024f09
AG
1996
1997 /*
d13fc32e 1998 * Fallback to always fail hypercalls regardless of endianness:
45024f09 1999 *
d13fc32e 2000 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
45024f09 2001 * li r3, -1
d13fc32e
AG
2002 * b .+8 (becomes nop in wrong endian)
2003 * bswap32(li r3, -1)
45024f09
AG
2004 */
2005
d13fc32e
AG
2006 hc[0] = cpu_to_be32(0x08000048);
2007 hc[1] = cpu_to_be32(0x3860ffff);
2008 hc[2] = cpu_to_be32(0x48000008);
2009 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
45024f09 2010
0ddbd053 2011 return 1;
45024f09
AG
2012}
2013
026bfd89
DG
2014static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2015{
2016 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2017}
2018
2019void kvmppc_enable_logical_ci_hcalls(void)
2020{
2021 /*
2022 * FIXME: it would be nice if we could detect the cases where
2023 * we're using a device which requires the in kernel
2024 * implementation of these hcalls, but the kernel lacks them and
2025 * produce a warning.
2026 */
2027 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2028 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2029}
2030
ef9971dd
AK
2031void kvmppc_enable_set_mode_hcall(void)
2032{
2033 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2034}
2035
1bc22652 2036void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 2037{
1bc22652 2038 CPUState *cs = CPU(cpu);
f61b4bed
AG
2039 int ret;
2040
48add816 2041 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
f61b4bed 2042 if (ret) {
072ed5f2
TH
2043 error_report("This vCPU type or KVM version does not support PAPR");
2044 exit(1);
94135e81 2045 }
9b00ea49
DG
2046
2047 /* Update the capability flag so we sync the right information
2048 * with kvm */
2049 cap_papr = 1;
f61b4bed
AG
2050}
2051
6db5bb0f
AK
2052int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2053{
2054 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2055}
2056
5b95b8b9
AG
2057void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2058{
5b95b8b9 2059 CPUState *cs = CPU(cpu);
5b95b8b9
AG
2060 int ret;
2061
48add816 2062 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
5b95b8b9 2063 if (ret && mpic_proxy) {
072ed5f2
TH
2064 error_report("This KVM version does not support EPR");
2065 exit(1);
5b95b8b9
AG
2066 }
2067}
2068
e97c3636
DG
2069int kvmppc_smt_threads(void)
2070{
2071 return cap_ppc_smt ? cap_ppc_smt : 1;
2072}
2073
7f763a5d 2074#ifdef TARGET_PPC64
658fa66b 2075off_t kvmppc_alloc_rma(void **rma)
354ac20a 2076{
354ac20a
DG
2077 off_t size;
2078 int fd;
2079 struct kvm_allocate_rma ret;
354ac20a
DG
2080
2081 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2082 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2083 * not necessary on this hardware
2084 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2085 *
2086 * FIXME: We should allow the user to force contiguous RMA
2087 * allocation in the cap_ppc_rma==1 case.
2088 */
2089 if (cap_ppc_rma < 2) {
2090 return 0;
2091 }
2092
2093 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2094 if (fd < 0) {
2095 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2096 strerror(errno));
2097 return -1;
2098 }
2099
2100 size = MIN(ret.rma_size, 256ul << 20);
2101
658fa66b
AK
2102 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2103 if (*rma == MAP_FAILED) {
354ac20a
DG
2104 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2105 return -1;
2106 };
2107
354ac20a
DG
2108 return size;
2109}
2110
7f763a5d
DG
2111uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2112{
f36951c1
DG
2113 struct kvm_ppc_smmu_info info;
2114 long rampagesize, best_page_shift;
2115 int i;
2116
7f763a5d
DG
2117 if (cap_ppc_rma >= 2) {
2118 return current_size;
2119 }
f36951c1
DG
2120
2121 /* Find the largest hardware supported page size that's less than
2122 * or equal to the (logical) backing page size of guest RAM */
182735ef 2123 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
f36951c1
DG
2124 rampagesize = getrampagesize();
2125 best_page_shift = 0;
2126
2127 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2128 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2129
2130 if (!sps->page_shift) {
2131 continue;
2132 }
2133
2134 if ((sps->page_shift > best_page_shift)
2135 && ((1UL << sps->page_shift) <= rampagesize)) {
2136 best_page_shift = sps->page_shift;
2137 }
2138 }
2139
7f763a5d 2140 return MIN(current_size,
f36951c1 2141 1ULL << (best_page_shift + hash_shift - 7));
7f763a5d
DG
2142}
2143#endif
2144
da95324e
AK
2145bool kvmppc_spapr_use_multitce(void)
2146{
2147 return cap_spapr_multitce;
2148}
2149
9bb62a07 2150void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
6a81dd17 2151 bool need_vfio)
0f5cb298
DG
2152{
2153 struct kvm_create_spapr_tce args = {
2154 .liobn = liobn,
2155 .window_size = window_size,
2156 };
2157 long len;
2158 int fd;
2159 void *table;
2160
b5aec396
DG
2161 /* Must set fd to -1 so we don't try to munmap when called for
2162 * destroying the table, which the upper layers -will- do
2163 */
2164 *pfd = -1;
6a81dd17 2165 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
0f5cb298
DG
2166 return NULL;
2167 }
2168
2169 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2170 if (fd < 0) {
b5aec396
DG
2171 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2172 liobn);
0f5cb298
DG
2173 return NULL;
2174 }
2175
a83000f5 2176 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
0f5cb298
DG
2177 /* FIXME: round this up to page size */
2178
74b41e56 2179 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 2180 if (table == MAP_FAILED) {
b5aec396
DG
2181 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2182 liobn);
0f5cb298
DG
2183 close(fd);
2184 return NULL;
2185 }
2186
2187 *pfd = fd;
2188 return table;
2189}
2190
523e7b8a 2191int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
0f5cb298
DG
2192{
2193 long len;
2194
2195 if (fd < 0) {
2196 return -1;
2197 }
2198
523e7b8a 2199 len = nb_table * sizeof(uint64_t);
0f5cb298
DG
2200 if ((munmap(table, len) < 0) ||
2201 (close(fd) < 0)) {
b5aec396
DG
2202 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2203 strerror(errno));
0f5cb298
DG
2204 /* Leak the table */
2205 }
2206
2207 return 0;
2208}
2209
7f763a5d
DG
2210int kvmppc_reset_htab(int shift_hint)
2211{
2212 uint32_t shift = shift_hint;
2213
ace9a2cb
DG
2214 if (!kvm_enabled()) {
2215 /* Full emulation, tell caller to allocate htab itself */
2216 return 0;
2217 }
2218 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
2219 int ret;
2220 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
2221 if (ret == -ENOTTY) {
2222 /* At least some versions of PR KVM advertise the
2223 * capability, but don't implement the ioctl(). Oops.
2224 * Return 0 so that we allocate the htab in qemu, as is
2225 * correct for PR. */
2226 return 0;
2227 } else if (ret < 0) {
7f763a5d
DG
2228 return ret;
2229 }
2230 return shift;
2231 }
2232
ace9a2cb
DG
2233 /* We have a kernel that predates the htab reset calls. For PR
2234 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2235 * this era, it has allocated a 16MB fixed size hash table
2236 * already. Kernels of this era have the GET_PVINFO capability
2237 * only on PR, so we use this hack to determine the right
2238 * answer */
2239 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2240 /* PR - tell caller to allocate htab */
2241 return 0;
2242 } else {
2243 /* HV - assume 16MB kernel allocated htab */
2244 return 24;
2245 }
7f763a5d
DG
2246}
2247
a1e98583
DG
2248static inline uint32_t mfpvr(void)
2249{
2250 uint32_t pvr;
2251
2252 asm ("mfpvr %0"
2253 : "=r"(pvr));
2254 return pvr;
2255}
2256
a7342588
DG
2257static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2258{
2259 if (on) {
2260 *word |= flags;
2261 } else {
2262 *word &= ~flags;
2263 }
2264}
2265
2985b86b 2266static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 2267{
2985b86b
AF
2268 assert(kvm_enabled());
2269}
2270
2271static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272{
4c315c27 2273 DeviceClass *dc = DEVICE_CLASS(oc);
2985b86b 2274 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
2275 uint32_t vmx = kvmppc_get_vmx();
2276 uint32_t dfp = kvmppc_get_dfp();
0cbad81f
DG
2277 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2278 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
a1e98583 2279
cfe34f44 2280 /* Now fix up the class with information we can query from the host */
3bc9ccc0 2281 pcc->pvr = mfpvr();
a7342588 2282
70bca53f
AG
2283 if (vmx != -1) {
2284 /* Only override when we know what the host supports */
cfe34f44
AF
2285 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2286 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
2287 }
2288 if (dfp != -1) {
2289 /* Only override when we know what the host supports */
cfe34f44 2290 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 2291 }
0cbad81f
DG
2292
2293 if (dcache_size != -1) {
2294 pcc->l1_dcache_size = dcache_size;
2295 }
2296
2297 if (icache_size != -1) {
2298 pcc->l1_icache_size = icache_size;
2299 }
4c315c27
MA
2300
2301 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2302 dc->cannot_destroy_with_object_finalize_yet = true;
a1e98583
DG
2303}
2304
3b961124
SY
2305bool kvmppc_has_cap_epr(void)
2306{
2307 return cap_epr;
2308}
2309
7c43bca0
AK
2310bool kvmppc_has_cap_htab_fd(void)
2311{
2312 return cap_htab_fd;
2313}
2314
87a91de6
AG
2315bool kvmppc_has_cap_fixup_hcalls(void)
2316{
2317 return cap_fixup_hcalls;
2318}
2319
5b79b1ca
AK
2320static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2321{
2322 ObjectClass *oc = OBJECT_CLASS(pcc);
2323
2324 while (oc && !object_class_is_abstract(oc)) {
2325 oc = object_class_get_parent(oc);
2326 }
2327 assert(oc);
2328
2329 return POWERPC_CPU_CLASS(oc);
2330}
2331
5ba4576b
AF
2332static int kvm_ppc_register_host_cpu_type(void)
2333{
2334 TypeInfo type_info = {
2335 .name = TYPE_HOST_POWERPC_CPU,
2336 .instance_init = kvmppc_host_cpu_initfn,
2337 .class_init = kvmppc_host_cpu_class_init,
2338 };
2339 uint32_t host_pvr = mfpvr();
2340 PowerPCCPUClass *pvr_pcc;
5b79b1ca 2341 DeviceClass *dc;
5ba4576b
AF
2342
2343 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
3bc9ccc0
AK
2344 if (pvr_pcc == NULL) {
2345 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2346 }
5ba4576b
AF
2347 if (pvr_pcc == NULL) {
2348 return -1;
2349 }
2350 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2351 type_register(&type_info);
5b79b1ca
AK
2352
2353 /* Register generic family CPU class for a family */
2354 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2355 dc = DEVICE_CLASS(pvr_pcc);
2356 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2357 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2358 type_register(&type_info);
2359
5ba4576b
AF
2360 return 0;
2361}
2362
feaa64c4
DG
2363int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2364{
2365 struct kvm_rtas_token_args args = {
2366 .token = token,
2367 };
2368
2369 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2370 return -ENOENT;
2371 }
2372
2373 strncpy(args.name, function, sizeof(args.name));
2374
2375 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2376}
12b1143b 2377
e68cb8b4
AK
2378int kvmppc_get_htab_fd(bool write)
2379{
2380 struct kvm_get_htab_fd s = {
2381 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2382 .start_index = 0,
2383 };
2384
2385 if (!cap_htab_fd) {
2386 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2387 return -1;
2388 }
2389
2390 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2391}
2392
2393int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2394{
bc72ad67 2395 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
e68cb8b4
AK
2396 uint8_t buf[bufsize];
2397 ssize_t rc;
2398
2399 do {
2400 rc = read(fd, buf, bufsize);
2401 if (rc < 0) {
2402 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2403 strerror(errno));
2404 return rc;
2405 } else if (rc) {
e094c4c1
CLG
2406 uint8_t *buffer = buf;
2407 ssize_t n = rc;
2408 while (n) {
2409 struct kvm_get_htab_header *head =
2410 (struct kvm_get_htab_header *) buffer;
2411 size_t chunksize = sizeof(*head) +
2412 HASH_PTE_SIZE_64 * head->n_valid;
2413
2414 qemu_put_be32(f, head->index);
2415 qemu_put_be16(f, head->n_valid);
2416 qemu_put_be16(f, head->n_invalid);
2417 qemu_put_buffer(f, (void *)(head + 1),
2418 HASH_PTE_SIZE_64 * head->n_valid);
2419
2420 buffer += chunksize;
2421 n -= chunksize;
2422 }
e68cb8b4
AK
2423 }
2424 } while ((rc != 0)
2425 && ((max_ns < 0)
bc72ad67 2426 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
e68cb8b4
AK
2427
2428 return (rc == 0) ? 1 : 0;
2429}
2430
2431int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2432 uint16_t n_valid, uint16_t n_invalid)
2433{
2434 struct kvm_get_htab_header *buf;
2435 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2436 ssize_t rc;
2437
2438 buf = alloca(chunksize);
e68cb8b4
AK
2439 buf->index = index;
2440 buf->n_valid = n_valid;
2441 buf->n_invalid = n_invalid;
2442
2443 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2444
2445 rc = write(fd, buf, chunksize);
2446 if (rc < 0) {
2447 fprintf(stderr, "Error writing KVM hash table: %s\n",
2448 strerror(errno));
2449 return rc;
2450 }
2451 if (rc != chunksize) {
2452 /* We should never get a short write on a single chunk */
2453 fprintf(stderr, "Short write, restoring KVM hash table\n");
2454 return -1;
2455 }
2456 return 0;
2457}
2458
20d695a9 2459bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
2460{
2461 return true;
2462}
a1b87fe0 2463
20d695a9 2464int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
2465{
2466 return 1;
2467}
2468
2469int kvm_arch_on_sigbus(int code, void *addr)
2470{
2471 return 1;
2472}
82169660
SW
2473
2474void kvm_arch_init_irq_routing(KVMState *s)
2475{
2476}
c65f9a07 2477
7c43bca0
AK
2478struct kvm_get_htab_buf {
2479 struct kvm_get_htab_header header;
2480 /*
2481 * We require one extra byte for read
2482 */
2483 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2484};
2485
2486uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2487{
2488 int htab_fd;
2489 struct kvm_get_htab_fd ghf;
2490 struct kvm_get_htab_buf *hpte_buf;
2491
2492 ghf.flags = 0;
2493 ghf.start_index = pte_index;
2494 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2495 if (htab_fd < 0) {
2496 goto error_out;
2497 }
2498
2499 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2500 /*
2501 * Read the hpte group
2502 */
2503 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2504 goto out_close;
2505 }
2506
2507 close(htab_fd);
2508 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2509
2510out_close:
2511 g_free(hpte_buf);
2512 close(htab_fd);
2513error_out:
2514 return 0;
2515}
2516
2517void kvmppc_hash64_free_pteg(uint64_t token)
2518{
2519 struct kvm_get_htab_buf *htab_buf;
2520
2521 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2522 hpte);
2523 g_free(htab_buf);
2524 return;
2525}
c1385933
AK
2526
2527void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2528 target_ulong pte0, target_ulong pte1)
2529{
2530 int htab_fd;
2531 struct kvm_get_htab_fd ghf;
2532 struct kvm_get_htab_buf hpte_buf;
2533
2534 ghf.flags = 0;
2535 ghf.start_index = 0; /* Ignored */
2536 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2537 if (htab_fd < 0) {
2538 goto error_out;
2539 }
2540
2541 hpte_buf.header.n_valid = 1;
2542 hpte_buf.header.n_invalid = 0;
2543 hpte_buf.header.index = pte_index;
2544 hpte_buf.hpte[0] = pte0;
2545 hpte_buf.hpte[1] = pte1;
2546 /*
2547 * Write the hpte entry.
2548 * CAUTION: write() has the warn_unused_result attribute. Hence we
2549 * need to check the return value, even though we do nothing.
2550 */
2551 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2552 goto out_close;
2553 }
2554
2555out_close:
2556 close(htab_fd);
2557 return;
2558
2559error_out:
2560 return;
2561}
9e03a040
FB
2562
2563int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
dc9f06ca 2564 uint64_t address, uint32_t data, PCIDevice *dev)
9e03a040
FB
2565{
2566 return 0;
2567}
1850b6b7
EA
2568
2569int kvm_arch_msi_data_to_gsi(uint32_t data)
2570{
2571 return data & 0xffff;
2572}
4d9392be
TH
2573
2574int kvmppc_enable_hwrng(void)
2575{
2576 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2577 return -1;
2578 }
2579
2580 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2581}