]> git.proxmox.com Git - mirror_qemu.git/blame - target/ppc/kvm.c
target/ppc/mmu_hash64: Fix incorrect shift value in amr calculation
[mirror_qemu.git] / target / ppc / kvm.c
CommitLineData
d76d1650
AJ
1/*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
90dc8812 5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
d76d1650
AJ
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
0d75590d 17#include "qemu/osdep.h"
eadaada1 18#include <dirent.h>
d76d1650 19#include <sys/ioctl.h>
4656e1f0 20#include <sys/vfs.h>
d76d1650
AJ
21
22#include <linux/kvm.h>
23
24#include "qemu-common.h"
072ed5f2 25#include "qemu/error-report.h"
33c11879 26#include "cpu.h"
1de7afc9 27#include "qemu/timer.h"
9c17d615 28#include "sysemu/sysemu.h"
b3946626 29#include "sysemu/hw_accel.h"
86b50f2e 30#include "sysemu/numa.h"
d76d1650 31#include "kvm_ppc.h"
9c17d615
PB
32#include "sysemu/cpus.h"
33#include "sysemu/device_tree.h"
d5aea6f3 34#include "mmu-hash64.h"
d76d1650 35
f61b4bed 36#include "hw/sysbus.h"
0d09e41a
PB
37#include "hw/ppc/spapr.h"
38#include "hw/ppc/spapr_vio.h"
7ebaf795 39#include "hw/ppc/spapr_cpu_core.h"
98a8b524 40#include "hw/ppc/ppc.h"
31f2cb8f 41#include "sysemu/watchdog.h"
b36f100e 42#include "trace.h"
88365d17 43#include "exec/gdbstub.h"
4c663752 44#include "exec/memattrs.h"
2d103aae 45#include "sysemu/hostmem.h"
f348b6d1 46#include "qemu/cutils.h"
3b542549
BR
47#if defined(TARGET_PPC64)
48#include "hw/ppc/spapr_cpu_core.h"
49#endif
f61b4bed 50
d76d1650
AJ
51//#define DEBUG_KVM
52
53#ifdef DEBUG_KVM
da56ff91 54#define DPRINTF(fmt, ...) \
d76d1650
AJ
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56#else
da56ff91 57#define DPRINTF(fmt, ...) \
d76d1650
AJ
58 do { } while (0)
59#endif
60
eadaada1
AG
61#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
62
94a8d39a
JK
63const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64 KVM_CAP_LAST_INFO
65};
66
fc87e185
AG
67static int cap_interrupt_unset = false;
68static int cap_interrupt_level = false;
90dc8812 69static int cap_segstate;
90dc8812 70static int cap_booke_sregs;
e97c3636 71static int cap_ppc_smt;
354ac20a 72static int cap_ppc_rma;
0f5cb298 73static int cap_spapr_tce;
da95324e 74static int cap_spapr_multitce;
9bb62a07 75static int cap_spapr_vfio;
f1af19d7 76static int cap_hior;
d67d40ea 77static int cap_one_reg;
3b961124 78static int cap_epr;
31f2cb8f 79static int cap_ppc_watchdog;
9b00ea49 80static int cap_papr;
e68cb8b4 81static int cap_htab_fd;
87a91de6 82static int cap_fixup_hcalls;
bac3bf28 83static int cap_htm; /* Hardware transactional memory support */
fc87e185 84
3c902d44
BB
85static uint32_t debug_inst_opcode;
86
c821c2bd
AG
87/* XXX We have a race condition where we actually have a level triggered
88 * interrupt, but the infrastructure can't expose that yet, so the guest
89 * takes but ignores it, goes to sleep and never gets notified that there's
90 * still an interrupt pending.
c6a94ba5 91 *
c821c2bd
AG
92 * As a quick workaround, let's just wake up again 20 ms after we injected
93 * an interrupt. That way we can assure that we're always reinjecting
94 * interrupts in case the guest swallowed them.
c6a94ba5
AG
95 */
96static QEMUTimer *idle_timer;
97
d5a68146 98static void kvm_kick_cpu(void *opaque)
c6a94ba5 99{
d5a68146 100 PowerPCCPU *cpu = opaque;
d5a68146 101
c08d7424 102 qemu_cpu_kick(CPU(cpu));
c6a94ba5
AG
103}
104
96c9cff0
TH
105/* Check whether we are running with KVM-PR (instead of KVM-HV). This
106 * should only be used for fallback tests - generally we should use
107 * explicit capabilities for the features we want, rather than
108 * assuming what is/isn't available depending on the KVM variant. */
109static bool kvmppc_is_pr(KVMState *ks)
110{
111 /* Assume KVM-PR if the GET_PVINFO capability is available */
112 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
113}
114
5ba4576b
AF
115static int kvm_ppc_register_host_cpu_type(void);
116
b16565b3 117int kvm_arch_init(MachineState *ms, KVMState *s)
d76d1650 118{
fc87e185 119 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
fc87e185 120 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90dc8812 121 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
90dc8812 122 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
e97c3636 123 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
354ac20a 124 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
0f5cb298 125 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
da95324e 126 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
9bb62a07 127 cap_spapr_vfio = false;
d67d40ea 128 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
f1af19d7 129 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
3b961124 130 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
31f2cb8f 131 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
9b00ea49
DG
132 /* Note: we don't set cap_papr here, because this capability is
133 * only activated after this by kvmppc_set_papr() */
e68cb8b4 134 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
87a91de6 135 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
bac3bf28 136 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
fc87e185
AG
137
138 if (!cap_interrupt_level) {
139 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
140 "VM to stall at times!\n");
141 }
142
5ba4576b
AF
143 kvm_ppc_register_host_cpu_type();
144
d76d1650
AJ
145 return 0;
146}
147
d525ffab
PB
148int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
149{
150 return 0;
151}
152
1bc22652 153static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
d76d1650 154{
1bc22652
AF
155 CPUPPCState *cenv = &cpu->env;
156 CPUState *cs = CPU(cpu);
861bbc80 157 struct kvm_sregs sregs;
5666ca4a
SW
158 int ret;
159
160 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
64e07be5
AG
161 /* What we're really trying to say is "if we're on BookE, we use
162 the native PVR for now". This is the only sane way to check
163 it though, so we potentially confuse users that they can run
164 BookE guests on BookS. Let's hope nobody dares enough :) */
5666ca4a
SW
165 return 0;
166 } else {
90dc8812 167 if (!cap_segstate) {
64e07be5
AG
168 fprintf(stderr, "kvm error: missing PVR setting capability\n");
169 return -ENOSYS;
5666ca4a 170 }
5666ca4a
SW
171 }
172
1bc22652 173 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
5666ca4a
SW
174 if (ret) {
175 return ret;
176 }
861bbc80
AG
177
178 sregs.pvr = cenv->spr[SPR_PVR];
1bc22652 179 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
5666ca4a
SW
180}
181
93dd5e85 182/* Set up a shared TLB array with KVM */
1bc22652 183static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
93dd5e85 184{
1bc22652
AF
185 CPUPPCState *env = &cpu->env;
186 CPUState *cs = CPU(cpu);
93dd5e85
SW
187 struct kvm_book3e_206_tlb_params params = {};
188 struct kvm_config_tlb cfg = {};
93dd5e85
SW
189 unsigned int entries = 0;
190 int ret, i;
191
192 if (!kvm_enabled() ||
a60f24b5 193 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
93dd5e85
SW
194 return 0;
195 }
196
197 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
198
199 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
200 params.tlb_sizes[i] = booke206_tlb_size(env, i);
201 params.tlb_ways[i] = booke206_tlb_ways(env, i);
202 entries += params.tlb_sizes[i];
203 }
204
205 assert(entries == env->nb_tlb);
206 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
207
208 env->tlb_dirty = true;
209
210 cfg.array = (uintptr_t)env->tlb.tlbm;
211 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
212 cfg.params = (uintptr_t)&params;
213 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
214
48add816 215 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
93dd5e85
SW
216 if (ret < 0) {
217 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
218 __func__, strerror(-ret));
219 return ret;
220 }
221
222 env->kvm_sw_tlb = true;
223 return 0;
224}
225
4656e1f0
BH
226
227#if defined(TARGET_PPC64)
a60f24b5 228static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
4656e1f0
BH
229 struct kvm_ppc_smmu_info *info)
230{
a60f24b5
AF
231 CPUPPCState *env = &cpu->env;
232 CPUState *cs = CPU(cpu);
233
4656e1f0
BH
234 memset(info, 0, sizeof(*info));
235
236 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
237 * need to "guess" what the supported page sizes are.
238 *
239 * For that to work we make a few assumptions:
240 *
96c9cff0
TH
241 * - Check whether we are running "PR" KVM which only supports 4K
242 * and 16M pages, but supports them regardless of the backing
243 * store characteritics. We also don't support 1T segments.
4656e1f0
BH
244 *
245 * This is safe as if HV KVM ever supports that capability or PR
246 * KVM grows supports for more page/segment sizes, those versions
247 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
248 * will not hit this fallback
249 *
250 * - Else we are running HV KVM. This means we only support page
251 * sizes that fit in the backing store. Additionally we only
252 * advertize 64K pages if the processor is ARCH 2.06 and we assume
253 * P7 encodings for the SLB and hash table. Here too, we assume
254 * support for any newer processor will mean a kernel that
255 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
256 * this fallback.
257 */
96c9cff0 258 if (kvmppc_is_pr(cs->kvm_state)) {
4656e1f0
BH
259 /* No flags */
260 info->flags = 0;
261 info->slb_size = 64;
262
263 /* Standard 4k base page size segment */
264 info->sps[0].page_shift = 12;
265 info->sps[0].slb_enc = 0;
266 info->sps[0].enc[0].page_shift = 12;
267 info->sps[0].enc[0].pte_enc = 0;
268
269 /* Standard 16M large page size segment */
270 info->sps[1].page_shift = 24;
271 info->sps[1].slb_enc = SLB_VSID_L;
272 info->sps[1].enc[0].page_shift = 24;
273 info->sps[1].enc[0].pte_enc = 0;
274 } else {
275 int i = 0;
276
277 /* HV KVM has backing store size restrictions */
278 info->flags = KVM_PPC_PAGE_SIZES_REAL;
279
280 if (env->mmu_model & POWERPC_MMU_1TSEG) {
281 info->flags |= KVM_PPC_1T_SEGMENTS;
282 }
283
aa4bb587
BH
284 if (env->mmu_model == POWERPC_MMU_2_06 ||
285 env->mmu_model == POWERPC_MMU_2_07) {
4656e1f0
BH
286 info->slb_size = 32;
287 } else {
288 info->slb_size = 64;
289 }
290
291 /* Standard 4k base page size segment */
292 info->sps[i].page_shift = 12;
293 info->sps[i].slb_enc = 0;
294 info->sps[i].enc[0].page_shift = 12;
295 info->sps[i].enc[0].pte_enc = 0;
296 i++;
297
aa4bb587
BH
298 /* 64K on MMU 2.06 and later */
299 if (env->mmu_model == POWERPC_MMU_2_06 ||
300 env->mmu_model == POWERPC_MMU_2_07) {
4656e1f0
BH
301 info->sps[i].page_shift = 16;
302 info->sps[i].slb_enc = 0x110;
303 info->sps[i].enc[0].page_shift = 16;
304 info->sps[i].enc[0].pte_enc = 1;
305 i++;
306 }
307
308 /* Standard 16M large page size segment */
309 info->sps[i].page_shift = 24;
310 info->sps[i].slb_enc = SLB_VSID_L;
311 info->sps[i].enc[0].page_shift = 24;
312 info->sps[i].enc[0].pte_enc = 0;
313 }
314}
315
a60f24b5 316static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
4656e1f0 317{
a60f24b5 318 CPUState *cs = CPU(cpu);
4656e1f0
BH
319 int ret;
320
a60f24b5
AF
321 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
322 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
4656e1f0
BH
323 if (ret == 0) {
324 return;
325 }
326 }
327
a60f24b5 328 kvm_get_fallback_smmu_info(cpu, info);
4656e1f0
BH
329}
330
2d103aae 331static long gethugepagesize(const char *mem_path)
4656e1f0
BH
332{
333 struct statfs fs;
334 int ret;
335
4656e1f0
BH
336 do {
337 ret = statfs(mem_path, &fs);
338 } while (ret != 0 && errno == EINTR);
339
340 if (ret != 0) {
341 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
342 strerror(errno));
343 exit(1);
344 }
345
346#define HUGETLBFS_MAGIC 0x958458f6
347
348 if (fs.f_type != HUGETLBFS_MAGIC) {
349 /* Explicit mempath, but it's ordinary pages */
350 return getpagesize();
351 }
352
353 /* It's hugepage, return the huge page size */
354 return fs.f_bsize;
355}
356
3be5cc23
MA
357/*
358 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
359 * may or may not name the same files / on the same filesystem now as
360 * when we actually open and map them. Iterate over the file
361 * descriptors instead, and use qemu_fd_getpagesize().
362 */
2d103aae
MR
363static int find_max_supported_pagesize(Object *obj, void *opaque)
364{
365 char *mem_path;
366 long *hpsize_min = opaque;
367
368 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
369 mem_path = object_property_get_str(obj, "mem-path", NULL);
370 if (mem_path) {
371 long hpsize = gethugepagesize(mem_path);
372 if (hpsize < *hpsize_min) {
373 *hpsize_min = hpsize;
374 }
375 } else {
376 *hpsize_min = getpagesize();
377 }
378 }
379
380 return 0;
381}
382
383static long getrampagesize(void)
384{
385 long hpsize = LONG_MAX;
3d4f2534 386 long mainrampagesize;
2d103aae
MR
387 Object *memdev_root;
388
389 if (mem_path) {
3d4f2534
TH
390 mainrampagesize = gethugepagesize(mem_path);
391 } else {
392 mainrampagesize = getpagesize();
2d103aae
MR
393 }
394
395 /* it's possible we have memory-backend objects with
396 * hugepage-backed RAM. these may get mapped into system
397 * address space via -numa parameters or memory hotplug
398 * hooks. we want to take these into account, but we
399 * also want to make sure these supported hugepage
400 * sizes are applicable across the entire range of memory
401 * we may boot from, so we take the min across all
402 * backends, and assume normal pages in cases where a
403 * backend isn't backed by hugepages.
404 */
405 memdev_root = object_resolve_path("/objects", NULL);
3d4f2534
TH
406 if (memdev_root) {
407 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
2d103aae 408 }
3d4f2534
TH
409 if (hpsize == LONG_MAX) {
410 /* No additional memory regions found ==> Report main RAM page size */
411 return mainrampagesize;
86b50f2e
TH
412 }
413
159d2e39 414 /* If NUMA is disabled or the NUMA nodes are not backed with a
3d4f2534
TH
415 * memory-backend, then there is at least one node using "normal" RAM,
416 * so if its page size is smaller we have got to report that size instead.
159d2e39 417 */
3d4f2534
TH
418 if (hpsize > mainrampagesize &&
419 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
86b50f2e
TH
420 static bool warned;
421 if (!warned) {
422 error_report("Huge page support disabled (n/a for main memory).");
423 warned = true;
424 }
3d4f2534 425 return mainrampagesize;
86b50f2e
TH
426 }
427
428 return hpsize;
2d103aae
MR
429}
430
4656e1f0
BH
431static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
432{
433 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
434 return true;
435 }
436
437 return (1ul << shift) <= rampgsize;
438}
439
a60f24b5 440static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
441{
442 static struct kvm_ppc_smmu_info smmu_info;
443 static bool has_smmu_info;
a60f24b5 444 CPUPPCState *env = &cpu->env;
4656e1f0
BH
445 long rampagesize;
446 int iq, ik, jq, jk;
0d594f55 447 bool has_64k_pages = false;
4656e1f0
BH
448
449 /* We only handle page sizes for 64-bit server guests for now */
450 if (!(env->mmu_model & POWERPC_MMU_64)) {
451 return;
452 }
453
454 /* Collect MMU info from kernel if not already */
455 if (!has_smmu_info) {
a60f24b5 456 kvm_get_smmu_info(cpu, &smmu_info);
4656e1f0
BH
457 has_smmu_info = true;
458 }
459
460 rampagesize = getrampagesize();
461
462 /* Convert to QEMU form */
463 memset(&env->sps, 0, sizeof(env->sps));
464
90da0d5a
BH
465 /* If we have HV KVM, we need to forbid CI large pages if our
466 * host page size is smaller than 64K.
467 */
468 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
469 env->ci_large_pages = getpagesize() >= 0x10000;
470 }
471
08215d8f
AG
472 /*
473 * XXX This loop should be an entry wide AND of the capabilities that
474 * the selected CPU has with the capabilities that KVM supports.
475 */
4656e1f0
BH
476 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
477 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
478 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
479
480 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
481 ksps->page_shift)) {
482 continue;
483 }
484 qsps->page_shift = ksps->page_shift;
485 qsps->slb_enc = ksps->slb_enc;
486 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
487 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
488 ksps->enc[jk].page_shift)) {
489 continue;
490 }
0d594f55
TH
491 if (ksps->enc[jk].page_shift == 16) {
492 has_64k_pages = true;
493 }
4656e1f0
BH
494 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
495 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
496 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
497 break;
498 }
499 }
500 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
501 break;
502 }
503 }
504 env->slb_nr = smmu_info.slb_size;
08215d8f 505 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
4656e1f0
BH
506 env->mmu_model &= ~POWERPC_MMU_1TSEG;
507 }
0d594f55
TH
508 if (!has_64k_pages) {
509 env->mmu_model &= ~POWERPC_MMU_64K;
510 }
4656e1f0
BH
511}
512#else /* defined (TARGET_PPC64) */
513
a60f24b5 514static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
4656e1f0
BH
515{
516}
517
518#endif /* !defined (TARGET_PPC64) */
519
b164e48e
EH
520unsigned long kvm_arch_vcpu_id(CPUState *cpu)
521{
0f20ba62 522 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
b164e48e
EH
523}
524
88365d17
BB
525/* e500 supports 2 h/w breakpoint and 2 watchpoint.
526 * book3s supports only 1 watchpoint, so array size
527 * of 4 is sufficient for now.
528 */
529#define MAX_HW_BKPTS 4
530
531static struct HWBreakpoint {
532 target_ulong addr;
533 int type;
534} hw_debug_points[MAX_HW_BKPTS];
535
536static CPUWatchpoint hw_watchpoint;
537
538/* Default there is no breakpoint and watchpoint supported */
539static int max_hw_breakpoint;
540static int max_hw_watchpoint;
541static int nb_hw_breakpoint;
542static int nb_hw_watchpoint;
543
544static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
545{
546 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
547 max_hw_breakpoint = 2;
548 max_hw_watchpoint = 2;
549 }
550
551 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
552 fprintf(stderr, "Error initializing h/w breakpoints\n");
553 return;
554 }
555}
556
20d695a9 557int kvm_arch_init_vcpu(CPUState *cs)
5666ca4a 558{
20d695a9
AF
559 PowerPCCPU *cpu = POWERPC_CPU(cs);
560 CPUPPCState *cenv = &cpu->env;
5666ca4a
SW
561 int ret;
562
4656e1f0 563 /* Gather server mmu info from KVM and update the CPU state */
a60f24b5 564 kvm_fixup_page_sizes(cpu);
4656e1f0
BH
565
566 /* Synchronize sregs with kvm */
1bc22652 567 ret = kvm_arch_sync_sregs(cpu);
5666ca4a 568 if (ret) {
388e47c7
TH
569 if (ret == -EINVAL) {
570 error_report("Register sync failed... If you're using kvm-hv.ko,"
571 " only \"-cpu host\" is possible");
572 }
5666ca4a
SW
573 return ret;
574 }
861bbc80 575
bc72ad67 576 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
c821c2bd 577
93dd5e85
SW
578 switch (cenv->mmu_model) {
579 case POWERPC_MMU_BOOKE206:
7f516c96 580 /* This target supports access to KVM's guest TLB */
1bc22652 581 ret = kvm_booke206_tlb_init(cpu);
93dd5e85 582 break;
7f516c96
TH
583 case POWERPC_MMU_2_07:
584 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
585 /* KVM-HV has transactional memory on POWER8 also without the
586 * KVM_CAP_PPC_HTM extension, so enable it here instead. */
587 cap_htm = true;
588 }
589 break;
93dd5e85
SW
590 default:
591 break;
592 }
593
3c902d44 594 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
88365d17 595 kvmppc_hw_debug_points_init(cenv);
3c902d44 596
861bbc80 597 return ret;
d76d1650
AJ
598}
599
1bc22652 600static void kvm_sw_tlb_put(PowerPCCPU *cpu)
93dd5e85 601{
1bc22652
AF
602 CPUPPCState *env = &cpu->env;
603 CPUState *cs = CPU(cpu);
93dd5e85
SW
604 struct kvm_dirty_tlb dirty_tlb;
605 unsigned char *bitmap;
606 int ret;
607
608 if (!env->kvm_sw_tlb) {
609 return;
610 }
611
612 bitmap = g_malloc((env->nb_tlb + 7) / 8);
613 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
614
615 dirty_tlb.bitmap = (uintptr_t)bitmap;
616 dirty_tlb.num_dirty = env->nb_tlb;
617
1bc22652 618 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
93dd5e85
SW
619 if (ret) {
620 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
621 __func__, strerror(-ret));
622 }
623
624 g_free(bitmap);
625}
626
d67d40ea
DG
627static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
628{
629 PowerPCCPU *cpu = POWERPC_CPU(cs);
630 CPUPPCState *env = &cpu->env;
631 union {
632 uint32_t u32;
633 uint64_t u64;
634 } val;
635 struct kvm_one_reg reg = {
636 .id = id,
637 .addr = (uintptr_t) &val,
638 };
639 int ret;
640
641 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
642 if (ret != 0) {
b36f100e 643 trace_kvm_failed_spr_get(spr, strerror(errno));
d67d40ea
DG
644 } else {
645 switch (id & KVM_REG_SIZE_MASK) {
646 case KVM_REG_SIZE_U32:
647 env->spr[spr] = val.u32;
648 break;
649
650 case KVM_REG_SIZE_U64:
651 env->spr[spr] = val.u64;
652 break;
653
654 default:
655 /* Don't handle this size yet */
656 abort();
657 }
658 }
659}
660
661static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
662{
663 PowerPCCPU *cpu = POWERPC_CPU(cs);
664 CPUPPCState *env = &cpu->env;
665 union {
666 uint32_t u32;
667 uint64_t u64;
668 } val;
669 struct kvm_one_reg reg = {
670 .id = id,
671 .addr = (uintptr_t) &val,
672 };
673 int ret;
674
675 switch (id & KVM_REG_SIZE_MASK) {
676 case KVM_REG_SIZE_U32:
677 val.u32 = env->spr[spr];
678 break;
679
680 case KVM_REG_SIZE_U64:
681 val.u64 = env->spr[spr];
682 break;
683
684 default:
685 /* Don't handle this size yet */
686 abort();
687 }
688
689 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
690 if (ret != 0) {
b36f100e 691 trace_kvm_failed_spr_set(spr, strerror(errno));
d67d40ea
DG
692 }
693}
694
70b79849
DG
695static int kvm_put_fp(CPUState *cs)
696{
697 PowerPCCPU *cpu = POWERPC_CPU(cs);
698 CPUPPCState *env = &cpu->env;
699 struct kvm_one_reg reg;
700 int i;
701 int ret;
702
703 if (env->insns_flags & PPC_FLOAT) {
704 uint64_t fpscr = env->fpscr;
705 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
706
707 reg.id = KVM_REG_PPC_FPSCR;
708 reg.addr = (uintptr_t)&fpscr;
709 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
710 if (ret < 0) {
da56ff91 711 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
70b79849
DG
712 return ret;
713 }
714
715 for (i = 0; i < 32; i++) {
716 uint64_t vsr[2];
717
3a4b791b 718#ifdef HOST_WORDS_BIGENDIAN
70b79849
DG
719 vsr[0] = float64_val(env->fpr[i]);
720 vsr[1] = env->vsr[i];
3a4b791b
GK
721#else
722 vsr[0] = env->vsr[i];
723 vsr[1] = float64_val(env->fpr[i]);
724#endif
70b79849
DG
725 reg.addr = (uintptr_t) &vsr;
726 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
727
728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729 if (ret < 0) {
da56ff91 730 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
70b79849
DG
731 i, strerror(errno));
732 return ret;
733 }
734 }
735 }
736
737 if (env->insns_flags & PPC_ALTIVEC) {
738 reg.id = KVM_REG_PPC_VSCR;
739 reg.addr = (uintptr_t)&env->vscr;
740 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
741 if (ret < 0) {
da56ff91 742 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
70b79849
DG
743 return ret;
744 }
745
746 for (i = 0; i < 32; i++) {
747 reg.id = KVM_REG_PPC_VR(i);
748 reg.addr = (uintptr_t)&env->avr[i];
749 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
750 if (ret < 0) {
da56ff91 751 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
70b79849
DG
752 return ret;
753 }
754 }
755 }
756
757 return 0;
758}
759
760static int kvm_get_fp(CPUState *cs)
761{
762 PowerPCCPU *cpu = POWERPC_CPU(cs);
763 CPUPPCState *env = &cpu->env;
764 struct kvm_one_reg reg;
765 int i;
766 int ret;
767
768 if (env->insns_flags & PPC_FLOAT) {
769 uint64_t fpscr;
770 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
771
772 reg.id = KVM_REG_PPC_FPSCR;
773 reg.addr = (uintptr_t)&fpscr;
774 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
775 if (ret < 0) {
da56ff91 776 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
70b79849
DG
777 return ret;
778 } else {
779 env->fpscr = fpscr;
780 }
781
782 for (i = 0; i < 32; i++) {
783 uint64_t vsr[2];
784
785 reg.addr = (uintptr_t) &vsr;
786 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
787
788 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
789 if (ret < 0) {
da56ff91 790 DPRINTF("Unable to get %s%d from KVM: %s\n",
70b79849
DG
791 vsx ? "VSR" : "FPR", i, strerror(errno));
792 return ret;
793 } else {
3a4b791b 794#ifdef HOST_WORDS_BIGENDIAN
70b79849
DG
795 env->fpr[i] = vsr[0];
796 if (vsx) {
797 env->vsr[i] = vsr[1];
798 }
3a4b791b
GK
799#else
800 env->fpr[i] = vsr[1];
801 if (vsx) {
802 env->vsr[i] = vsr[0];
803 }
804#endif
70b79849
DG
805 }
806 }
807 }
808
809 if (env->insns_flags & PPC_ALTIVEC) {
810 reg.id = KVM_REG_PPC_VSCR;
811 reg.addr = (uintptr_t)&env->vscr;
812 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
813 if (ret < 0) {
da56ff91 814 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
70b79849
DG
815 return ret;
816 }
817
818 for (i = 0; i < 32; i++) {
819 reg.id = KVM_REG_PPC_VR(i);
820 reg.addr = (uintptr_t)&env->avr[i];
821 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
822 if (ret < 0) {
da56ff91 823 DPRINTF("Unable to get VR%d from KVM: %s\n",
70b79849
DG
824 i, strerror(errno));
825 return ret;
826 }
827 }
828 }
829
830 return 0;
831}
832
9b00ea49
DG
833#if defined(TARGET_PPC64)
834static int kvm_get_vpa(CPUState *cs)
835{
836 PowerPCCPU *cpu = POWERPC_CPU(cs);
837 CPUPPCState *env = &cpu->env;
838 struct kvm_one_reg reg;
839 int ret;
840
841 reg.id = KVM_REG_PPC_VPA_ADDR;
842 reg.addr = (uintptr_t)&env->vpa_addr;
843 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
844 if (ret < 0) {
da56ff91 845 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
9b00ea49
DG
846 return ret;
847 }
848
849 assert((uintptr_t)&env->slb_shadow_size
850 == ((uintptr_t)&env->slb_shadow_addr + 8));
851 reg.id = KVM_REG_PPC_VPA_SLB;
852 reg.addr = (uintptr_t)&env->slb_shadow_addr;
853 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
854 if (ret < 0) {
da56ff91 855 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
9b00ea49
DG
856 strerror(errno));
857 return ret;
858 }
859
860 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
861 reg.id = KVM_REG_PPC_VPA_DTL;
862 reg.addr = (uintptr_t)&env->dtl_addr;
863 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
864 if (ret < 0) {
da56ff91 865 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
9b00ea49
DG
866 strerror(errno));
867 return ret;
868 }
869
870 return 0;
871}
872
873static int kvm_put_vpa(CPUState *cs)
874{
875 PowerPCCPU *cpu = POWERPC_CPU(cs);
876 CPUPPCState *env = &cpu->env;
877 struct kvm_one_reg reg;
878 int ret;
879
880 /* SLB shadow or DTL can't be registered unless a master VPA is
881 * registered. That means when restoring state, if a VPA *is*
882 * registered, we need to set that up first. If not, we need to
883 * deregister the others before deregistering the master VPA */
884 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
885
886 if (env->vpa_addr) {
887 reg.id = KVM_REG_PPC_VPA_ADDR;
888 reg.addr = (uintptr_t)&env->vpa_addr;
889 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
890 if (ret < 0) {
da56ff91 891 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
892 return ret;
893 }
894 }
895
896 assert((uintptr_t)&env->slb_shadow_size
897 == ((uintptr_t)&env->slb_shadow_addr + 8));
898 reg.id = KVM_REG_PPC_VPA_SLB;
899 reg.addr = (uintptr_t)&env->slb_shadow_addr;
900 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
901 if (ret < 0) {
da56ff91 902 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
9b00ea49
DG
903 return ret;
904 }
905
906 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
907 reg.id = KVM_REG_PPC_VPA_DTL;
908 reg.addr = (uintptr_t)&env->dtl_addr;
909 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
910 if (ret < 0) {
da56ff91 911 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
9b00ea49
DG
912 strerror(errno));
913 return ret;
914 }
915
916 if (!env->vpa_addr) {
917 reg.id = KVM_REG_PPC_VPA_ADDR;
918 reg.addr = (uintptr_t)&env->vpa_addr;
919 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
920 if (ret < 0) {
da56ff91 921 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
9b00ea49
DG
922 return ret;
923 }
924 }
925
926 return 0;
927}
928#endif /* TARGET_PPC64 */
929
e5c0d3ce 930int kvmppc_put_books_sregs(PowerPCCPU *cpu)
a7a00a72
DG
931{
932 CPUPPCState *env = &cpu->env;
933 struct kvm_sregs sregs;
934 int i;
935
936 sregs.pvr = env->spr[SPR_PVR];
937
938 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
939
940 /* Sync SLB */
941#ifdef TARGET_PPC64
942 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
943 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
944 if (env->slb[i].esid & SLB_ESID_V) {
945 sregs.u.s.ppc64.slb[i].slbe |= i;
946 }
947 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
948 }
949#endif
950
951 /* Sync SRs */
952 for (i = 0; i < 16; i++) {
953 sregs.u.s.ppc32.sr[i] = env->sr[i];
954 }
955
956 /* Sync BATs */
957 for (i = 0; i < 8; i++) {
958 /* Beware. We have to swap upper and lower bits here */
959 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
960 | env->DBAT[1][i];
961 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
962 | env->IBAT[1][i];
963 }
964
965 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
966}
967
20d695a9 968int kvm_arch_put_registers(CPUState *cs, int level)
d76d1650 969{
20d695a9
AF
970 PowerPCCPU *cpu = POWERPC_CPU(cs);
971 CPUPPCState *env = &cpu->env;
d76d1650
AJ
972 struct kvm_regs regs;
973 int ret;
974 int i;
975
1bc22652
AF
976 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
977 if (ret < 0) {
d76d1650 978 return ret;
1bc22652 979 }
d76d1650
AJ
980
981 regs.ctr = env->ctr;
982 regs.lr = env->lr;
da91a00f 983 regs.xer = cpu_read_xer(env);
d76d1650
AJ
984 regs.msr = env->msr;
985 regs.pc = env->nip;
986
987 regs.srr0 = env->spr[SPR_SRR0];
988 regs.srr1 = env->spr[SPR_SRR1];
989
990 regs.sprg0 = env->spr[SPR_SPRG0];
991 regs.sprg1 = env->spr[SPR_SPRG1];
992 regs.sprg2 = env->spr[SPR_SPRG2];
993 regs.sprg3 = env->spr[SPR_SPRG3];
994 regs.sprg4 = env->spr[SPR_SPRG4];
995 regs.sprg5 = env->spr[SPR_SPRG5];
996 regs.sprg6 = env->spr[SPR_SPRG6];
997 regs.sprg7 = env->spr[SPR_SPRG7];
998
90dc8812
SW
999 regs.pid = env->spr[SPR_BOOKE_PID];
1000
d76d1650
AJ
1001 for (i = 0;i < 32; i++)
1002 regs.gpr[i] = env->gpr[i];
1003
4bddaf55
AK
1004 regs.cr = 0;
1005 for (i = 0; i < 8; i++) {
1006 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1007 }
1008
1bc22652 1009 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
d76d1650
AJ
1010 if (ret < 0)
1011 return ret;
1012
70b79849
DG
1013 kvm_put_fp(cs);
1014
93dd5e85 1015 if (env->tlb_dirty) {
1bc22652 1016 kvm_sw_tlb_put(cpu);
93dd5e85
SW
1017 env->tlb_dirty = false;
1018 }
1019
f1af19d7 1020 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
a7a00a72
DG
1021 ret = kvmppc_put_books_sregs(cpu);
1022 if (ret < 0) {
f1af19d7
DG
1023 return ret;
1024 }
1025 }
1026
1027 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
d67d40ea
DG
1028 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1029 }
f1af19d7 1030
d67d40ea
DG
1031 if (cap_one_reg) {
1032 int i;
1033
1034 /* We deliberately ignore errors here, for kernels which have
1035 * the ONE_REG calls, but don't support the specific
1036 * registers, there's a reasonable chance things will still
1037 * work, at least until we try to migrate. */
1038 for (i = 0; i < 1024; i++) {
1039 uint64_t id = env->spr_cb[i].one_reg_id;
1040
1041 if (id != 0) {
1042 kvm_put_one_spr(cs, id, i);
1043 }
f1af19d7 1044 }
9b00ea49
DG
1045
1046#ifdef TARGET_PPC64
80b3f79b
AK
1047 if (msr_ts) {
1048 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1049 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1050 }
1051 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1053 }
1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1064 }
1065
9b00ea49
DG
1066 if (cap_papr) {
1067 if (kvm_put_vpa(cs) < 0) {
da56ff91 1068 DPRINTF("Warning: Unable to set VPA information to KVM\n");
9b00ea49
DG
1069 }
1070 }
98a8b524
AK
1071
1072 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1073#endif /* TARGET_PPC64 */
f1af19d7
DG
1074 }
1075
d76d1650
AJ
1076 return ret;
1077}
1078
c371c2e3
BB
1079static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1080{
1081 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1082}
1083
a7a00a72
DG
1084static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1085{
1086 CPUPPCState *env = &cpu->env;
1087 struct kvm_sregs sregs;
1088 int ret;
1089
1090 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1091 if (ret < 0) {
1092 return ret;
1093 }
1094
1095 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1096 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1097 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1098 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1099 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1100 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1101 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1102 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1103 env->spr[SPR_DECR] = sregs.u.e.dec;
1104 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1105 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1106 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1107 }
1108
1109 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1110 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1111 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1112 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1113 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1114 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1115 }
1116
1117 if (sregs.u.e.features & KVM_SREGS_E_64) {
1118 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1119 }
1120
1121 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1122 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1123 }
1124
1125 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1126 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1127 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1128 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1129 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1130 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1131 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1132 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1133 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1134 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1135 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1136 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1137 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1138 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1139 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1140 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1141 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1142 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1143 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1144 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1145 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1146 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1147 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1148 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1149 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1150 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1151 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1152 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1153 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1154 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1155 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1156 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1157 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1158
1159 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1160 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1161 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1162 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1163 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1164 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1165 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1166 }
1167
1168 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1169 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1170 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1171 }
1172
1173 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1174 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1175 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1176 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1177 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1178 }
1179 }
1180
1181 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1182 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1183 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1184 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1185 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1186 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1187 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1188 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1189 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1190 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1191 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1192 }
1193
1194 if (sregs.u.e.features & KVM_SREGS_EXP) {
1195 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1196 }
1197
1198 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1199 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1200 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1201 }
1202
1203 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1204 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1205 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1206 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1207
1208 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1209 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1210 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1211 }
1212 }
1213
1214 return 0;
1215}
1216
1217static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1218{
1219 CPUPPCState *env = &cpu->env;
1220 struct kvm_sregs sregs;
1221 int ret;
1222 int i;
1223
1224 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1225 if (ret < 0) {
1226 return ret;
1227 }
1228
1229 if (!env->external_htab) {
1230 ppc_store_sdr1(env, sregs.u.s.sdr1);
1231 }
1232
1233 /* Sync SLB */
1234#ifdef TARGET_PPC64
1235 /*
1236 * The packed SLB array we get from KVM_GET_SREGS only contains
1237 * information about valid entries. So we flush our internal copy
1238 * to get rid of stale ones, then put all valid SLB entries back
1239 * in.
1240 */
1241 memset(env->slb, 0, sizeof(env->slb));
1242 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1243 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1244 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1245 /*
1246 * Only restore valid entries
1247 */
1248 if (rb & SLB_ESID_V) {
1249 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1250 }
1251 }
1252#endif
1253
1254 /* Sync SRs */
1255 for (i = 0; i < 16; i++) {
1256 env->sr[i] = sregs.u.s.ppc32.sr[i];
1257 }
1258
1259 /* Sync BATs */
1260 for (i = 0; i < 8; i++) {
1261 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1262 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1263 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1264 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1265 }
1266
1267 return 0;
1268}
1269
20d695a9 1270int kvm_arch_get_registers(CPUState *cs)
d76d1650 1271{
20d695a9
AF
1272 PowerPCCPU *cpu = POWERPC_CPU(cs);
1273 CPUPPCState *env = &cpu->env;
d76d1650 1274 struct kvm_regs regs;
90dc8812 1275 uint32_t cr;
138b38b6 1276 int i, ret;
d76d1650 1277
1bc22652 1278 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
d76d1650
AJ
1279 if (ret < 0)
1280 return ret;
1281
90dc8812
SW
1282 cr = regs.cr;
1283 for (i = 7; i >= 0; i--) {
1284 env->crf[i] = cr & 15;
1285 cr >>= 4;
1286 }
ba5e5090 1287
d76d1650
AJ
1288 env->ctr = regs.ctr;
1289 env->lr = regs.lr;
da91a00f 1290 cpu_write_xer(env, regs.xer);
d76d1650
AJ
1291 env->msr = regs.msr;
1292 env->nip = regs.pc;
1293
1294 env->spr[SPR_SRR0] = regs.srr0;
1295 env->spr[SPR_SRR1] = regs.srr1;
1296
1297 env->spr[SPR_SPRG0] = regs.sprg0;
1298 env->spr[SPR_SPRG1] = regs.sprg1;
1299 env->spr[SPR_SPRG2] = regs.sprg2;
1300 env->spr[SPR_SPRG3] = regs.sprg3;
1301 env->spr[SPR_SPRG4] = regs.sprg4;
1302 env->spr[SPR_SPRG5] = regs.sprg5;
1303 env->spr[SPR_SPRG6] = regs.sprg6;
1304 env->spr[SPR_SPRG7] = regs.sprg7;
1305
90dc8812
SW
1306 env->spr[SPR_BOOKE_PID] = regs.pid;
1307
d76d1650
AJ
1308 for (i = 0;i < 32; i++)
1309 env->gpr[i] = regs.gpr[i];
1310
70b79849
DG
1311 kvm_get_fp(cs);
1312
90dc8812 1313 if (cap_booke_sregs) {
a7a00a72 1314 ret = kvmppc_get_booke_sregs(cpu);
90dc8812
SW
1315 if (ret < 0) {
1316 return ret;
1317 }
fafc0b6a 1318 }
90dc8812 1319
90dc8812 1320 if (cap_segstate) {
a7a00a72 1321 ret = kvmppc_get_books_sregs(cpu);
90dc8812
SW
1322 if (ret < 0) {
1323 return ret;
1324 }
fafc0b6a 1325 }
ba5e5090 1326
d67d40ea
DG
1327 if (cap_hior) {
1328 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1329 }
1330
1331 if (cap_one_reg) {
1332 int i;
1333
1334 /* We deliberately ignore errors here, for kernels which have
1335 * the ONE_REG calls, but don't support the specific
1336 * registers, there's a reasonable chance things will still
1337 * work, at least until we try to migrate. */
1338 for (i = 0; i < 1024; i++) {
1339 uint64_t id = env->spr_cb[i].one_reg_id;
1340
1341 if (id != 0) {
1342 kvm_get_one_spr(cs, id, i);
1343 }
1344 }
9b00ea49
DG
1345
1346#ifdef TARGET_PPC64
80b3f79b
AK
1347 if (msr_ts) {
1348 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1349 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1350 }
1351 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1353 }
1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1364 }
1365
9b00ea49
DG
1366 if (cap_papr) {
1367 if (kvm_get_vpa(cs) < 0) {
da56ff91 1368 DPRINTF("Warning: Unable to get VPA information from KVM\n");
9b00ea49
DG
1369 }
1370 }
98a8b524
AK
1371
1372 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
9b00ea49 1373#endif
d67d40ea
DG
1374 }
1375
d76d1650
AJ
1376 return 0;
1377}
1378
1bc22652 1379int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
fc87e185
AG
1380{
1381 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1382
1383 if (irq != PPC_INTERRUPT_EXT) {
1384 return 0;
1385 }
1386
1387 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1388 return 0;
1389 }
1390
1bc22652 1391 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
fc87e185
AG
1392
1393 return 0;
1394}
1395
16415335
AG
1396#if defined(TARGET_PPCEMB)
1397#define PPC_INPUT_INT PPC40x_INPUT_INT
1398#elif defined(TARGET_PPC64)
1399#define PPC_INPUT_INT PPC970_INPUT_INT
1400#else
1401#define PPC_INPUT_INT PPC6xx_INPUT_INT
1402#endif
1403
20d695a9 1404void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
d76d1650 1405{
20d695a9
AF
1406 PowerPCCPU *cpu = POWERPC_CPU(cs);
1407 CPUPPCState *env = &cpu->env;
d76d1650
AJ
1408 int r;
1409 unsigned irq;
1410
4b8523ee
JK
1411 qemu_mutex_lock_iothread();
1412
5cbdb3a3 1413 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
d76d1650 1414 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
fc87e185
AG
1415 if (!cap_interrupt_level &&
1416 run->ready_for_interrupt_injection &&
259186a7 1417 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
16415335 1418 (env->irq_input_state & (1<<PPC_INPUT_INT)))
d76d1650
AJ
1419 {
1420 /* For now KVM disregards the 'irq' argument. However, in the
1421 * future KVM could cache it in-kernel to avoid a heavyweight exit
1422 * when reading the UIC.
1423 */
fc87e185 1424 irq = KVM_INTERRUPT_SET;
d76d1650 1425
da56ff91 1426 DPRINTF("injected interrupt %d\n", irq);
1bc22652 1427 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
55e5c285
AF
1428 if (r < 0) {
1429 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1430 }
c821c2bd
AG
1431
1432 /* Always wake up soon in case the interrupt was level based */
bc72ad67 1433 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 1434 (NANOSECONDS_PER_SECOND / 50));
d76d1650
AJ
1435 }
1436
1437 /* We don't know if there are more interrupts pending after this. However,
1438 * the guest will return to userspace in the course of handling this one
1439 * anyways, so we will get a chance to deliver the rest. */
4b8523ee
JK
1440
1441 qemu_mutex_unlock_iothread();
d76d1650
AJ
1442}
1443
4c663752 1444MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
d76d1650 1445{
4c663752 1446 return MEMTXATTRS_UNSPECIFIED;
d76d1650
AJ
1447}
1448
20d695a9 1449int kvm_arch_process_async_events(CPUState *cs)
0af691d7 1450{
259186a7 1451 return cs->halted;
0af691d7
MT
1452}
1453
259186a7 1454static int kvmppc_handle_halt(PowerPCCPU *cpu)
d76d1650 1455{
259186a7
AF
1456 CPUState *cs = CPU(cpu);
1457 CPUPPCState *env = &cpu->env;
1458
1459 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1460 cs->halted = 1;
27103424 1461 cs->exception_index = EXCP_HLT;
d76d1650
AJ
1462 }
1463
bb4ea393 1464 return 0;
d76d1650
AJ
1465}
1466
1467/* map dcr access to existing qemu dcr emulation */
1328c2bf 1468static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
d76d1650
AJ
1469{
1470 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1471 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1472
bb4ea393 1473 return 0;
d76d1650
AJ
1474}
1475
1328c2bf 1476static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
d76d1650
AJ
1477{
1478 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1479 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1480
bb4ea393 1481 return 0;
d76d1650
AJ
1482}
1483
8a0548f9
BB
1484int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1485{
1486 /* Mixed endian case is not handled */
1487 uint32_t sc = debug_inst_opcode;
1488
1489 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1490 sizeof(sc), 0) ||
1491 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1492 return -EINVAL;
1493 }
1494
1495 return 0;
1496}
1497
1498int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1499{
1500 uint32_t sc;
1501
1502 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1503 sc != debug_inst_opcode ||
1504 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1505 sizeof(sc), 1)) {
1506 return -EINVAL;
1507 }
1508
1509 return 0;
1510}
1511
88365d17
BB
1512static int find_hw_breakpoint(target_ulong addr, int type)
1513{
1514 int n;
1515
1516 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1517 <= ARRAY_SIZE(hw_debug_points));
1518
1519 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1520 if (hw_debug_points[n].addr == addr &&
1521 hw_debug_points[n].type == type) {
1522 return n;
1523 }
1524 }
1525
1526 return -1;
1527}
1528
1529static int find_hw_watchpoint(target_ulong addr, int *flag)
1530{
1531 int n;
1532
1533 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1534 if (n >= 0) {
1535 *flag = BP_MEM_ACCESS;
1536 return n;
1537 }
1538
1539 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1540 if (n >= 0) {
1541 *flag = BP_MEM_WRITE;
1542 return n;
1543 }
1544
1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1546 if (n >= 0) {
1547 *flag = BP_MEM_READ;
1548 return n;
1549 }
1550
1551 return -1;
1552}
1553
1554int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1555 target_ulong len, int type)
1556{
1557 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1558 return -ENOBUFS;
1559 }
1560
1561 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1562 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1563
1564 switch (type) {
1565 case GDB_BREAKPOINT_HW:
1566 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1567 return -ENOBUFS;
1568 }
1569
1570 if (find_hw_breakpoint(addr, type) >= 0) {
1571 return -EEXIST;
1572 }
1573
1574 nb_hw_breakpoint++;
1575 break;
1576
1577 case GDB_WATCHPOINT_WRITE:
1578 case GDB_WATCHPOINT_READ:
1579 case GDB_WATCHPOINT_ACCESS:
1580 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1581 return -ENOBUFS;
1582 }
1583
1584 if (find_hw_breakpoint(addr, type) >= 0) {
1585 return -EEXIST;
1586 }
1587
1588 nb_hw_watchpoint++;
1589 break;
1590
1591 default:
1592 return -ENOSYS;
1593 }
1594
1595 return 0;
1596}
1597
1598int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1599 target_ulong len, int type)
1600{
1601 int n;
1602
1603 n = find_hw_breakpoint(addr, type);
1604 if (n < 0) {
1605 return -ENOENT;
1606 }
1607
1608 switch (type) {
1609 case GDB_BREAKPOINT_HW:
1610 nb_hw_breakpoint--;
1611 break;
1612
1613 case GDB_WATCHPOINT_WRITE:
1614 case GDB_WATCHPOINT_READ:
1615 case GDB_WATCHPOINT_ACCESS:
1616 nb_hw_watchpoint--;
1617 break;
1618
1619 default:
1620 return -ENOSYS;
1621 }
1622 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1623
1624 return 0;
1625}
1626
1627void kvm_arch_remove_all_hw_breakpoints(void)
1628{
1629 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1630}
1631
8a0548f9
BB
1632void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1633{
88365d17
BB
1634 int n;
1635
8a0548f9
BB
1636 /* Software Breakpoint updates */
1637 if (kvm_sw_breakpoints_active(cs)) {
1638 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1639 }
88365d17
BB
1640
1641 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1642 <= ARRAY_SIZE(hw_debug_points));
1643 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1644
1645 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1646 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1647 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1648 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1649 switch (hw_debug_points[n].type) {
1650 case GDB_BREAKPOINT_HW:
1651 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1652 break;
1653 case GDB_WATCHPOINT_WRITE:
1654 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1655 break;
1656 case GDB_WATCHPOINT_READ:
1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1658 break;
1659 case GDB_WATCHPOINT_ACCESS:
1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1661 KVMPPC_DEBUG_WATCH_READ;
1662 break;
1663 default:
1664 cpu_abort(cs, "Unsupported breakpoint type\n");
1665 }
1666 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1667 }
1668 }
8a0548f9
BB
1669}
1670
1671static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1672{
1673 CPUState *cs = CPU(cpu);
1674 CPUPPCState *env = &cpu->env;
1675 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1676 int handle = 0;
88365d17
BB
1677 int n;
1678 int flag = 0;
8a0548f9 1679
88365d17
BB
1680 if (cs->singlestep_enabled) {
1681 handle = 1;
1682 } else if (arch_info->status) {
1683 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1684 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1685 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1686 if (n >= 0) {
1687 handle = 1;
1688 }
1689 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1690 KVMPPC_DEBUG_WATCH_WRITE)) {
1691 n = find_hw_watchpoint(arch_info->address, &flag);
1692 if (n >= 0) {
1693 handle = 1;
1694 cs->watchpoint_hit = &hw_watchpoint;
1695 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1696 hw_watchpoint.flags = flag;
1697 }
1698 }
1699 }
1700 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
8a0548f9
BB
1701 handle = 1;
1702 } else {
1703 /* QEMU is not able to handle debug exception, so inject
1704 * program exception to guest;
1705 * Yes program exception NOT debug exception !!
88365d17
BB
1706 * When QEMU is using debug resources then debug exception must
1707 * be always set. To achieve this we set MSR_DE and also set
1708 * MSRP_DEP so guest cannot change MSR_DE.
1709 * When emulating debug resource for guest we want guest
1710 * to control MSR_DE (enable/disable debug interrupt on need).
1711 * Supporting both configurations are NOT possible.
1712 * So the result is that we cannot share debug resources
1713 * between QEMU and Guest on BOOKE architecture.
1714 * In the current design QEMU gets the priority over guest,
1715 * this means that if QEMU is using debug resources then guest
1716 * cannot use them;
8a0548f9
BB
1717 * For software breakpoint QEMU uses a privileged instruction;
1718 * So there cannot be any reason that we are here for guest
1719 * set debug exception, only possibility is guest executed a
1720 * privileged / illegal instruction and that's why we are
1721 * injecting a program interrupt.
1722 */
1723
1724 cpu_synchronize_state(cs);
1725 /* env->nip is PC, so increment this by 4 to use
1726 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1727 */
1728 env->nip += 4;
1729 cs->exception_index = POWERPC_EXCP_PROGRAM;
1730 env->error_code = POWERPC_EXCP_INVAL;
1731 ppc_cpu_do_interrupt(cs);
1732 }
1733
1734 return handle;
1735}
1736
20d695a9 1737int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
d76d1650 1738{
20d695a9
AF
1739 PowerPCCPU *cpu = POWERPC_CPU(cs);
1740 CPUPPCState *env = &cpu->env;
bb4ea393 1741 int ret;
d76d1650 1742
4b8523ee
JK
1743 qemu_mutex_lock_iothread();
1744
d76d1650
AJ
1745 switch (run->exit_reason) {
1746 case KVM_EXIT_DCR:
1747 if (run->dcr.is_write) {
da56ff91 1748 DPRINTF("handle dcr write\n");
d76d1650
AJ
1749 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1750 } else {
da56ff91 1751 DPRINTF("handle dcr read\n");
d76d1650
AJ
1752 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1753 }
1754 break;
1755 case KVM_EXIT_HLT:
da56ff91 1756 DPRINTF("handle halt\n");
259186a7 1757 ret = kvmppc_handle_halt(cpu);
d76d1650 1758 break;
c6304a4a 1759#if defined(TARGET_PPC64)
f61b4bed 1760 case KVM_EXIT_PAPR_HCALL:
da56ff91 1761 DPRINTF("handle PAPR hypercall\n");
20d695a9 1762 run->papr_hcall.ret = spapr_hypercall(cpu,
aa100fa4 1763 run->papr_hcall.nr,
f61b4bed 1764 run->papr_hcall.args);
78e8fde2 1765 ret = 0;
f61b4bed
AG
1766 break;
1767#endif
5b95b8b9 1768 case KVM_EXIT_EPR:
da56ff91 1769 DPRINTF("handle epr\n");
933b19ea 1770 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
5b95b8b9
AG
1771 ret = 0;
1772 break;
31f2cb8f 1773 case KVM_EXIT_WATCHDOG:
da56ff91 1774 DPRINTF("handle watchdog expiry\n");
31f2cb8f
BB
1775 watchdog_perform_action();
1776 ret = 0;
1777 break;
1778
8a0548f9
BB
1779 case KVM_EXIT_DEBUG:
1780 DPRINTF("handle debug exception\n");
1781 if (kvm_handle_debug(cpu, run)) {
1782 ret = EXCP_DEBUG;
1783 break;
1784 }
1785 /* re-enter, this exception was guest-internal */
1786 ret = 0;
1787 break;
1788
73aaec4a
JK
1789 default:
1790 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1791 ret = -1;
1792 break;
d76d1650
AJ
1793 }
1794
4b8523ee 1795 qemu_mutex_unlock_iothread();
d76d1650
AJ
1796 return ret;
1797}
1798
31f2cb8f
BB
1799int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1800{
1801 CPUState *cs = CPU(cpu);
1802 uint32_t bits = tsr_bits;
1803 struct kvm_one_reg reg = {
1804 .id = KVM_REG_PPC_OR_TSR,
1805 .addr = (uintptr_t) &bits,
1806 };
1807
1808 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1809}
1810
1811int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1812{
1813
1814 CPUState *cs = CPU(cpu);
1815 uint32_t bits = tsr_bits;
1816 struct kvm_one_reg reg = {
1817 .id = KVM_REG_PPC_CLEAR_TSR,
1818 .addr = (uintptr_t) &bits,
1819 };
1820
1821 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1822}
1823
1824int kvmppc_set_tcr(PowerPCCPU *cpu)
1825{
1826 CPUState *cs = CPU(cpu);
1827 CPUPPCState *env = &cpu->env;
1828 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1829
1830 struct kvm_one_reg reg = {
1831 .id = KVM_REG_PPC_TCR,
1832 .addr = (uintptr_t) &tcr,
1833 };
1834
1835 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836}
1837
1838int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1839{
1840 CPUState *cs = CPU(cpu);
31f2cb8f
BB
1841 int ret;
1842
1843 if (!kvm_enabled()) {
1844 return -1;
1845 }
1846
1847 if (!cap_ppc_watchdog) {
1848 printf("warning: KVM does not support watchdog");
1849 return -1;
1850 }
1851
48add816 1852 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
31f2cb8f
BB
1853 if (ret < 0) {
1854 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1855 __func__, strerror(-ret));
1856 return ret;
1857 }
1858
1859 return ret;
1860}
1861
dc333cd6
AG
1862static int read_cpuinfo(const char *field, char *value, int len)
1863{
1864 FILE *f;
1865 int ret = -1;
1866 int field_len = strlen(field);
1867 char line[512];
1868
1869 f = fopen("/proc/cpuinfo", "r");
1870 if (!f) {
1871 return -1;
1872 }
1873
1874 do {
ef951443 1875 if (!fgets(line, sizeof(line), f)) {
dc333cd6
AG
1876 break;
1877 }
1878 if (!strncmp(line, field, field_len)) {
ae215068 1879 pstrcpy(value, len, line);
dc333cd6
AG
1880 ret = 0;
1881 break;
1882 }
1883 } while(*line);
1884
1885 fclose(f);
1886
1887 return ret;
1888}
1889
1890uint32_t kvmppc_get_tbfreq(void)
1891{
1892 char line[512];
1893 char *ns;
73bcb24d 1894 uint32_t retval = NANOSECONDS_PER_SECOND;
dc333cd6
AG
1895
1896 if (read_cpuinfo("timebase", line, sizeof(line))) {
1897 return retval;
1898 }
1899
1900 if (!(ns = strchr(line, ':'))) {
1901 return retval;
1902 }
1903
1904 ns++;
1905
f9b8e7f6 1906 return atoi(ns);
dc333cd6 1907}
4513d923 1908
ef951443
ND
1909bool kvmppc_get_host_serial(char **value)
1910{
1911 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1912 NULL);
1913}
1914
1915bool kvmppc_get_host_model(char **value)
1916{
1917 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1918}
1919
eadaada1
AG
1920/* Try to find a device tree node for a CPU with clock-frequency property */
1921static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1922{
1923 struct dirent *dirp;
1924 DIR *dp;
1925
1926 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1927 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1928 return -1;
1929 }
1930
1931 buf[0] = '\0';
1932 while ((dirp = readdir(dp)) != NULL) {
1933 FILE *f;
1934 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1935 dirp->d_name);
1936 f = fopen(buf, "r");
1937 if (f) {
1938 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1939 fclose(f);
1940 break;
1941 }
1942 buf[0] = '\0';
1943 }
1944 closedir(dp);
1945 if (buf[0] == '\0') {
1946 printf("Unknown host!\n");
1947 return -1;
1948 }
1949
1950 return 0;
1951}
1952
7d94a30b 1953static uint64_t kvmppc_read_int_dt(const char *filename)
eadaada1 1954{
9bc884b7
DG
1955 union {
1956 uint32_t v32;
1957 uint64_t v64;
1958 } u;
eadaada1
AG
1959 FILE *f;
1960 int len;
1961
7d94a30b 1962 f = fopen(filename, "rb");
eadaada1
AG
1963 if (!f) {
1964 return -1;
1965 }
1966
9bc884b7 1967 len = fread(&u, 1, sizeof(u), f);
eadaada1
AG
1968 fclose(f);
1969 switch (len) {
9bc884b7
DG
1970 case 4:
1971 /* property is a 32-bit quantity */
1972 return be32_to_cpu(u.v32);
1973 case 8:
1974 return be64_to_cpu(u.v64);
eadaada1
AG
1975 }
1976
1977 return 0;
1978}
1979
7d94a30b
SB
1980/* Read a CPU node property from the host device tree that's a single
1981 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1982 * (can't find or open the property, or doesn't understand the
1983 * format) */
1984static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1985{
1986 char buf[PATH_MAX], *tmp;
1987 uint64_t val;
1988
1989 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1990 return -1;
1991 }
1992
1993 tmp = g_strdup_printf("%s/%s", buf, propname);
1994 val = kvmppc_read_int_dt(tmp);
1995 g_free(tmp);
1996
1997 return val;
1998}
1999
9bc884b7
DG
2000uint64_t kvmppc_get_clockfreq(void)
2001{
2002 return kvmppc_read_int_cpu_dt("clock-frequency");
2003}
2004
6659394f
DG
2005uint32_t kvmppc_get_vmx(void)
2006{
2007 return kvmppc_read_int_cpu_dt("ibm,vmx");
2008}
2009
2010uint32_t kvmppc_get_dfp(void)
2011{
2012 return kvmppc_read_int_cpu_dt("ibm,dfp");
2013}
2014
1a61a9ae
SY
2015static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2016 {
2017 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2018 CPUState *cs = CPU(cpu);
2019
6fd33a75 2020 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1a61a9ae
SY
2021 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2022 return 0;
2023 }
2024
2025 return 1;
2026}
2027
2028int kvmppc_get_hasidle(CPUPPCState *env)
2029{
2030 struct kvm_ppc_pvinfo pvinfo;
2031
2032 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2033 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2034 return 1;
2035 }
2036
2037 return 0;
2038}
2039
1328c2bf 2040int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
45024f09
AG
2041{
2042 uint32_t *hc = (uint32_t*)buf;
45024f09
AG
2043 struct kvm_ppc_pvinfo pvinfo;
2044
1a61a9ae 2045 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
45024f09 2046 memcpy(buf, pvinfo.hcall, buf_len);
45024f09
AG
2047 return 0;
2048 }
45024f09
AG
2049
2050 /*
d13fc32e 2051 * Fallback to always fail hypercalls regardless of endianness:
45024f09 2052 *
d13fc32e 2053 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
45024f09 2054 * li r3, -1
d13fc32e
AG
2055 * b .+8 (becomes nop in wrong endian)
2056 * bswap32(li r3, -1)
45024f09
AG
2057 */
2058
d13fc32e
AG
2059 hc[0] = cpu_to_be32(0x08000048);
2060 hc[1] = cpu_to_be32(0x3860ffff);
2061 hc[2] = cpu_to_be32(0x48000008);
2062 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
45024f09 2063
0ddbd053 2064 return 1;
45024f09
AG
2065}
2066
026bfd89
DG
2067static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2068{
2069 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2070}
2071
2072void kvmppc_enable_logical_ci_hcalls(void)
2073{
2074 /*
2075 * FIXME: it would be nice if we could detect the cases where
2076 * we're using a device which requires the in kernel
2077 * implementation of these hcalls, but the kernel lacks them and
2078 * produce a warning.
2079 */
2080 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2081 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2082}
2083
ef9971dd
AK
2084void kvmppc_enable_set_mode_hcall(void)
2085{
2086 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2087}
2088
5145ad4f
NW
2089void kvmppc_enable_clear_ref_mod_hcalls(void)
2090{
2091 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2092 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2093}
2094
1bc22652 2095void kvmppc_set_papr(PowerPCCPU *cpu)
f61b4bed 2096{
1bc22652 2097 CPUState *cs = CPU(cpu);
f61b4bed
AG
2098 int ret;
2099
48add816 2100 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
f61b4bed 2101 if (ret) {
072ed5f2
TH
2102 error_report("This vCPU type or KVM version does not support PAPR");
2103 exit(1);
94135e81 2104 }
9b00ea49
DG
2105
2106 /* Update the capability flag so we sync the right information
2107 * with kvm */
2108 cap_papr = 1;
f61b4bed
AG
2109}
2110
d6e166c0 2111int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
6db5bb0f 2112{
d6e166c0 2113 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
6db5bb0f
AK
2114}
2115
5b95b8b9
AG
2116void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2117{
5b95b8b9 2118 CPUState *cs = CPU(cpu);
5b95b8b9
AG
2119 int ret;
2120
48add816 2121 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
5b95b8b9 2122 if (ret && mpic_proxy) {
072ed5f2
TH
2123 error_report("This KVM version does not support EPR");
2124 exit(1);
5b95b8b9
AG
2125 }
2126}
2127
e97c3636
DG
2128int kvmppc_smt_threads(void)
2129{
2130 return cap_ppc_smt ? cap_ppc_smt : 1;
2131}
2132
7f763a5d 2133#ifdef TARGET_PPC64
658fa66b 2134off_t kvmppc_alloc_rma(void **rma)
354ac20a 2135{
354ac20a
DG
2136 off_t size;
2137 int fd;
2138 struct kvm_allocate_rma ret;
354ac20a
DG
2139
2140 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2141 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2142 * not necessary on this hardware
2143 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2144 *
2145 * FIXME: We should allow the user to force contiguous RMA
2146 * allocation in the cap_ppc_rma==1 case.
2147 */
2148 if (cap_ppc_rma < 2) {
2149 return 0;
2150 }
2151
2152 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2153 if (fd < 0) {
2154 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2155 strerror(errno));
2156 return -1;
2157 }
2158
2159 size = MIN(ret.rma_size, 256ul << 20);
2160
658fa66b
AK
2161 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2162 if (*rma == MAP_FAILED) {
354ac20a
DG
2163 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2164 return -1;
2165 };
2166
354ac20a
DG
2167 return size;
2168}
2169
7f763a5d
DG
2170uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2171{
f36951c1
DG
2172 struct kvm_ppc_smmu_info info;
2173 long rampagesize, best_page_shift;
2174 int i;
2175
7f763a5d
DG
2176 if (cap_ppc_rma >= 2) {
2177 return current_size;
2178 }
f36951c1
DG
2179
2180 /* Find the largest hardware supported page size that's less than
2181 * or equal to the (logical) backing page size of guest RAM */
182735ef 2182 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
f36951c1
DG
2183 rampagesize = getrampagesize();
2184 best_page_shift = 0;
2185
2186 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2187 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2188
2189 if (!sps->page_shift) {
2190 continue;
2191 }
2192
2193 if ((sps->page_shift > best_page_shift)
2194 && ((1UL << sps->page_shift) <= rampagesize)) {
2195 best_page_shift = sps->page_shift;
2196 }
2197 }
2198
7f763a5d 2199 return MIN(current_size,
f36951c1 2200 1ULL << (best_page_shift + hash_shift - 7));
7f763a5d
DG
2201}
2202#endif
2203
da95324e
AK
2204bool kvmppc_spapr_use_multitce(void)
2205{
2206 return cap_spapr_multitce;
2207}
2208
9bb62a07 2209void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
6a81dd17 2210 bool need_vfio)
0f5cb298
DG
2211{
2212 struct kvm_create_spapr_tce args = {
2213 .liobn = liobn,
2214 .window_size = window_size,
2215 };
2216 long len;
2217 int fd;
2218 void *table;
2219
b5aec396
DG
2220 /* Must set fd to -1 so we don't try to munmap when called for
2221 * destroying the table, which the upper layers -will- do
2222 */
2223 *pfd = -1;
6a81dd17 2224 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
0f5cb298
DG
2225 return NULL;
2226 }
2227
2228 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2229 if (fd < 0) {
b5aec396
DG
2230 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2231 liobn);
0f5cb298
DG
2232 return NULL;
2233 }
2234
a83000f5 2235 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
0f5cb298
DG
2236 /* FIXME: round this up to page size */
2237
74b41e56 2238 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0f5cb298 2239 if (table == MAP_FAILED) {
b5aec396
DG
2240 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2241 liobn);
0f5cb298
DG
2242 close(fd);
2243 return NULL;
2244 }
2245
2246 *pfd = fd;
2247 return table;
2248}
2249
523e7b8a 2250int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
0f5cb298
DG
2251{
2252 long len;
2253
2254 if (fd < 0) {
2255 return -1;
2256 }
2257
523e7b8a 2258 len = nb_table * sizeof(uint64_t);
0f5cb298
DG
2259 if ((munmap(table, len) < 0) ||
2260 (close(fd) < 0)) {
b5aec396
DG
2261 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2262 strerror(errno));
0f5cb298
DG
2263 /* Leak the table */
2264 }
2265
2266 return 0;
2267}
2268
7f763a5d
DG
2269int kvmppc_reset_htab(int shift_hint)
2270{
2271 uint32_t shift = shift_hint;
2272
ace9a2cb
DG
2273 if (!kvm_enabled()) {
2274 /* Full emulation, tell caller to allocate htab itself */
2275 return 0;
2276 }
2277 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
7f763a5d
DG
2278 int ret;
2279 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
ace9a2cb
DG
2280 if (ret == -ENOTTY) {
2281 /* At least some versions of PR KVM advertise the
2282 * capability, but don't implement the ioctl(). Oops.
2283 * Return 0 so that we allocate the htab in qemu, as is
2284 * correct for PR. */
2285 return 0;
2286 } else if (ret < 0) {
7f763a5d
DG
2287 return ret;
2288 }
2289 return shift;
2290 }
2291
ace9a2cb
DG
2292 /* We have a kernel that predates the htab reset calls. For PR
2293 * KVM, we need to allocate the htab ourselves, for an HV KVM of
96c9cff0
TH
2294 * this era, it has allocated a 16MB fixed size hash table already. */
2295 if (kvmppc_is_pr(kvm_state)) {
ace9a2cb
DG
2296 /* PR - tell caller to allocate htab */
2297 return 0;
2298 } else {
2299 /* HV - assume 16MB kernel allocated htab */
2300 return 24;
2301 }
7f763a5d
DG
2302}
2303
a1e98583
DG
2304static inline uint32_t mfpvr(void)
2305{
2306 uint32_t pvr;
2307
2308 asm ("mfpvr %0"
2309 : "=r"(pvr));
2310 return pvr;
2311}
2312
a7342588
DG
2313static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2314{
2315 if (on) {
2316 *word |= flags;
2317 } else {
2318 *word &= ~flags;
2319 }
2320}
2321
2985b86b 2322static void kvmppc_host_cpu_initfn(Object *obj)
a1e98583 2323{
2985b86b
AF
2324 assert(kvm_enabled());
2325}
2326
2327static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2328{
4c315c27 2329 DeviceClass *dc = DEVICE_CLASS(oc);
2985b86b 2330 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
a7342588
DG
2331 uint32_t vmx = kvmppc_get_vmx();
2332 uint32_t dfp = kvmppc_get_dfp();
0cbad81f
DG
2333 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2334 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
a1e98583 2335
cfe34f44 2336 /* Now fix up the class with information we can query from the host */
3bc9ccc0 2337 pcc->pvr = mfpvr();
a7342588 2338
70bca53f
AG
2339 if (vmx != -1) {
2340 /* Only override when we know what the host supports */
cfe34f44
AF
2341 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2342 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
70bca53f
AG
2343 }
2344 if (dfp != -1) {
2345 /* Only override when we know what the host supports */
cfe34f44 2346 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
70bca53f 2347 }
0cbad81f
DG
2348
2349 if (dcache_size != -1) {
2350 pcc->l1_dcache_size = dcache_size;
2351 }
2352
2353 if (icache_size != -1) {
2354 pcc->l1_icache_size = icache_size;
2355 }
4c315c27
MA
2356
2357 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2358 dc->cannot_destroy_with_object_finalize_yet = true;
a1e98583
DG
2359}
2360
3b961124
SY
2361bool kvmppc_has_cap_epr(void)
2362{
2363 return cap_epr;
2364}
2365
7c43bca0
AK
2366bool kvmppc_has_cap_htab_fd(void)
2367{
2368 return cap_htab_fd;
2369}
2370
87a91de6
AG
2371bool kvmppc_has_cap_fixup_hcalls(void)
2372{
2373 return cap_fixup_hcalls;
2374}
2375
bac3bf28
TH
2376bool kvmppc_has_cap_htm(void)
2377{
2378 return cap_htm;
2379}
2380
5b79b1ca
AK
2381static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2382{
2383 ObjectClass *oc = OBJECT_CLASS(pcc);
2384
2385 while (oc && !object_class_is_abstract(oc)) {
2386 oc = object_class_get_parent(oc);
2387 }
2388 assert(oc);
2389
2390 return POWERPC_CPU_CLASS(oc);
2391}
2392
52b2519c
TH
2393PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2394{
2395 uint32_t host_pvr = mfpvr();
2396 PowerPCCPUClass *pvr_pcc;
2397
2398 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2399 if (pvr_pcc == NULL) {
2400 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2401 }
2402
2403 return pvr_pcc;
2404}
2405
5ba4576b
AF
2406static int kvm_ppc_register_host_cpu_type(void)
2407{
2408 TypeInfo type_info = {
2409 .name = TYPE_HOST_POWERPC_CPU,
2410 .instance_init = kvmppc_host_cpu_initfn,
2411 .class_init = kvmppc_host_cpu_class_init,
2412 };
5ba4576b 2413 PowerPCCPUClass *pvr_pcc;
5b79b1ca 2414 DeviceClass *dc;
5ba4576b 2415
52b2519c 2416 pvr_pcc = kvm_ppc_get_host_cpu_class();
5ba4576b
AF
2417 if (pvr_pcc == NULL) {
2418 return -1;
2419 }
2420 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2421 type_register(&type_info);
5b79b1ca 2422
9c83fc2e
TH
2423 /* Register generic family CPU class for a family */
2424 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2425 dc = DEVICE_CLASS(pvr_pcc);
2426 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2427 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2428 type_register(&type_info);
2429
3b542549
BR
2430#if defined(TARGET_PPC64)
2431 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2432 type_info.parent = TYPE_SPAPR_CPU_CORE,
7ebaf795
BR
2433 type_info.instance_size = sizeof(sPAPRCPUCore);
2434 type_info.instance_init = NULL;
2435 type_info.class_init = spapr_cpu_core_class_init;
2436 type_info.class_data = (void *) "host";
3b542549
BR
2437 type_register(&type_info);
2438 g_free((void *)type_info.name);
d11b268e
TH
2439
2440 /* Register generic spapr CPU family class for current host CPU type */
2441 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
7ebaf795 2442 type_info.class_data = (void *) dc->desc;
d11b268e
TH
2443 type_register(&type_info);
2444 g_free((void *)type_info.name);
3b542549
BR
2445#endif
2446
5ba4576b
AF
2447 return 0;
2448}
2449
feaa64c4
DG
2450int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2451{
2452 struct kvm_rtas_token_args args = {
2453 .token = token,
2454 };
2455
2456 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2457 return -ENOENT;
2458 }
2459
2460 strncpy(args.name, function, sizeof(args.name));
2461
2462 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2463}
12b1143b 2464
e68cb8b4
AK
2465int kvmppc_get_htab_fd(bool write)
2466{
2467 struct kvm_get_htab_fd s = {
2468 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2469 .start_index = 0,
2470 };
2471
2472 if (!cap_htab_fd) {
2473 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2474 return -1;
2475 }
2476
2477 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2478}
2479
2480int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2481{
bc72ad67 2482 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
e68cb8b4
AK
2483 uint8_t buf[bufsize];
2484 ssize_t rc;
2485
2486 do {
2487 rc = read(fd, buf, bufsize);
2488 if (rc < 0) {
2489 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2490 strerror(errno));
2491 return rc;
2492 } else if (rc) {
e094c4c1
CLG
2493 uint8_t *buffer = buf;
2494 ssize_t n = rc;
2495 while (n) {
2496 struct kvm_get_htab_header *head =
2497 (struct kvm_get_htab_header *) buffer;
2498 size_t chunksize = sizeof(*head) +
2499 HASH_PTE_SIZE_64 * head->n_valid;
2500
2501 qemu_put_be32(f, head->index);
2502 qemu_put_be16(f, head->n_valid);
2503 qemu_put_be16(f, head->n_invalid);
2504 qemu_put_buffer(f, (void *)(head + 1),
2505 HASH_PTE_SIZE_64 * head->n_valid);
2506
2507 buffer += chunksize;
2508 n -= chunksize;
2509 }
e68cb8b4
AK
2510 }
2511 } while ((rc != 0)
2512 && ((max_ns < 0)
bc72ad67 2513 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
e68cb8b4
AK
2514
2515 return (rc == 0) ? 1 : 0;
2516}
2517
2518int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2519 uint16_t n_valid, uint16_t n_invalid)
2520{
2521 struct kvm_get_htab_header *buf;
2522 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2523 ssize_t rc;
2524
2525 buf = alloca(chunksize);
e68cb8b4
AK
2526 buf->index = index;
2527 buf->n_valid = n_valid;
2528 buf->n_invalid = n_invalid;
2529
2530 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2531
2532 rc = write(fd, buf, chunksize);
2533 if (rc < 0) {
2534 fprintf(stderr, "Error writing KVM hash table: %s\n",
2535 strerror(errno));
2536 return rc;
2537 }
2538 if (rc != chunksize) {
2539 /* We should never get a short write on a single chunk */
2540 fprintf(stderr, "Short write, restoring KVM hash table\n");
2541 return -1;
2542 }
2543 return 0;
2544}
2545
20d695a9 2546bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
4513d923
GN
2547{
2548 return true;
2549}
a1b87fe0 2550
20d695a9 2551int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
a1b87fe0
JK
2552{
2553 return 1;
2554}
2555
2556int kvm_arch_on_sigbus(int code, void *addr)
2557{
2558 return 1;
2559}
82169660
SW
2560
2561void kvm_arch_init_irq_routing(KVMState *s)
2562{
2563}
c65f9a07 2564
7c43bca0
AK
2565struct kvm_get_htab_buf {
2566 struct kvm_get_htab_header header;
2567 /*
2568 * We require one extra byte for read
2569 */
2570 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2571};
2572
2573uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2574{
2575 int htab_fd;
2576 struct kvm_get_htab_fd ghf;
2577 struct kvm_get_htab_buf *hpte_buf;
2578
2579 ghf.flags = 0;
2580 ghf.start_index = pte_index;
2581 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2582 if (htab_fd < 0) {
2583 goto error_out;
2584 }
2585
2586 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2587 /*
2588 * Read the hpte group
2589 */
2590 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2591 goto out_close;
2592 }
2593
2594 close(htab_fd);
2595 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2596
2597out_close:
2598 g_free(hpte_buf);
2599 close(htab_fd);
2600error_out:
2601 return 0;
2602}
2603
2604void kvmppc_hash64_free_pteg(uint64_t token)
2605{
2606 struct kvm_get_htab_buf *htab_buf;
2607
2608 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2609 hpte);
2610 g_free(htab_buf);
2611 return;
2612}
c1385933
AK
2613
2614void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2615 target_ulong pte0, target_ulong pte1)
2616{
2617 int htab_fd;
2618 struct kvm_get_htab_fd ghf;
2619 struct kvm_get_htab_buf hpte_buf;
2620
2621 ghf.flags = 0;
2622 ghf.start_index = 0; /* Ignored */
2623 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2624 if (htab_fd < 0) {
2625 goto error_out;
2626 }
2627
2628 hpte_buf.header.n_valid = 1;
2629 hpte_buf.header.n_invalid = 0;
2630 hpte_buf.header.index = pte_index;
2631 hpte_buf.hpte[0] = pte0;
2632 hpte_buf.hpte[1] = pte1;
2633 /*
2634 * Write the hpte entry.
2635 * CAUTION: write() has the warn_unused_result attribute. Hence we
2636 * need to check the return value, even though we do nothing.
2637 */
2638 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2639 goto out_close;
2640 }
2641
2642out_close:
2643 close(htab_fd);
2644 return;
2645
2646error_out:
2647 return;
2648}
9e03a040
FB
2649
2650int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
dc9f06ca 2651 uint64_t address, uint32_t data, PCIDevice *dev)
9e03a040
FB
2652{
2653 return 0;
2654}
1850b6b7 2655
38d87493
PX
2656int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2657 int vector, PCIDevice *dev)
2658{
2659 return 0;
2660}
2661
2662int kvm_arch_release_virq_post(int virq)
2663{
2664 return 0;
2665}
2666
1850b6b7
EA
2667int kvm_arch_msi_data_to_gsi(uint32_t data)
2668{
2669 return data & 0xffff;
2670}
4d9392be
TH
2671
2672int kvmppc_enable_hwrng(void)
2673{
2674 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2675 return -1;
2676 }
2677
2678 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2679}