]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/kvm.c
s390x/css: handle format-0 TIC CCW correctly
[mirror_qemu.git] / target / ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "sysemu/numa.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #if defined(TARGET_PPC64)
49 #include "hw/ppc/spapr_cpu_core.h"
50 #endif
51
52 //#define DEBUG_KVM
53
54 #ifdef DEBUG_KVM
55 #define DPRINTF(fmt, ...) \
56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #else
58 #define DPRINTF(fmt, ...) \
59 do { } while (0)
60 #endif
61
62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
65 KVM_CAP_LAST_INFO
66 };
67
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_rma;
74 static int cap_spapr_tce;
75 static int cap_spapr_multitce;
76 static int cap_spapr_vfio;
77 static int cap_hior;
78 static int cap_one_reg;
79 static int cap_epr;
80 static int cap_ppc_watchdog;
81 static int cap_papr;
82 static int cap_htab_fd;
83 static int cap_fixup_hcalls;
84 static int cap_htm; /* Hardware transactional memory support */
85
86 static uint32_t debug_inst_opcode;
87
88 /* XXX We have a race condition where we actually have a level triggered
89 * interrupt, but the infrastructure can't expose that yet, so the guest
90 * takes but ignores it, goes to sleep and never gets notified that there's
91 * still an interrupt pending.
92 *
93 * As a quick workaround, let's just wake up again 20 ms after we injected
94 * an interrupt. That way we can assure that we're always reinjecting
95 * interrupts in case the guest swallowed them.
96 */
97 static QEMUTimer *idle_timer;
98
99 static void kvm_kick_cpu(void *opaque)
100 {
101 PowerPCCPU *cpu = opaque;
102
103 qemu_cpu_kick(CPU(cpu));
104 }
105
106 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
107 * should only be used for fallback tests - generally we should use
108 * explicit capabilities for the features we want, rather than
109 * assuming what is/isn't available depending on the KVM variant. */
110 static bool kvmppc_is_pr(KVMState *ks)
111 {
112 /* Assume KVM-PR if the GET_PVINFO capability is available */
113 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
114 }
115
116 static int kvm_ppc_register_host_cpu_type(void);
117
118 int kvm_arch_init(MachineState *ms, KVMState *s)
119 {
120 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
121 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
122 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
123 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
124 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
125 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
126 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
127 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
128 cap_spapr_vfio = false;
129 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
130 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
131 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
132 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
133 /* Note: we don't set cap_papr here, because this capability is
134 * only activated after this by kvmppc_set_papr() */
135 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
136 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
137 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
138
139 if (!cap_interrupt_level) {
140 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
141 "VM to stall at times!\n");
142 }
143
144 kvm_ppc_register_host_cpu_type();
145
146 return 0;
147 }
148
149 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
150 {
151 return 0;
152 }
153
154 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
155 {
156 CPUPPCState *cenv = &cpu->env;
157 CPUState *cs = CPU(cpu);
158 struct kvm_sregs sregs;
159 int ret;
160
161 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
162 /* What we're really trying to say is "if we're on BookE, we use
163 the native PVR for now". This is the only sane way to check
164 it though, so we potentially confuse users that they can run
165 BookE guests on BookS. Let's hope nobody dares enough :) */
166 return 0;
167 } else {
168 if (!cap_segstate) {
169 fprintf(stderr, "kvm error: missing PVR setting capability\n");
170 return -ENOSYS;
171 }
172 }
173
174 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
175 if (ret) {
176 return ret;
177 }
178
179 sregs.pvr = cenv->spr[SPR_PVR];
180 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
181 }
182
183 /* Set up a shared TLB array with KVM */
184 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
185 {
186 CPUPPCState *env = &cpu->env;
187 CPUState *cs = CPU(cpu);
188 struct kvm_book3e_206_tlb_params params = {};
189 struct kvm_config_tlb cfg = {};
190 unsigned int entries = 0;
191 int ret, i;
192
193 if (!kvm_enabled() ||
194 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
195 return 0;
196 }
197
198 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
199
200 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
201 params.tlb_sizes[i] = booke206_tlb_size(env, i);
202 params.tlb_ways[i] = booke206_tlb_ways(env, i);
203 entries += params.tlb_sizes[i];
204 }
205
206 assert(entries == env->nb_tlb);
207 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
208
209 env->tlb_dirty = true;
210
211 cfg.array = (uintptr_t)env->tlb.tlbm;
212 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
213 cfg.params = (uintptr_t)&params;
214 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
215
216 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
217 if (ret < 0) {
218 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
219 __func__, strerror(-ret));
220 return ret;
221 }
222
223 env->kvm_sw_tlb = true;
224 return 0;
225 }
226
227
228 #if defined(TARGET_PPC64)
229 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
230 struct kvm_ppc_smmu_info *info)
231 {
232 CPUPPCState *env = &cpu->env;
233 CPUState *cs = CPU(cpu);
234
235 memset(info, 0, sizeof(*info));
236
237 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
238 * need to "guess" what the supported page sizes are.
239 *
240 * For that to work we make a few assumptions:
241 *
242 * - Check whether we are running "PR" KVM which only supports 4K
243 * and 16M pages, but supports them regardless of the backing
244 * store characteritics. We also don't support 1T segments.
245 *
246 * This is safe as if HV KVM ever supports that capability or PR
247 * KVM grows supports for more page/segment sizes, those versions
248 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
249 * will not hit this fallback
250 *
251 * - Else we are running HV KVM. This means we only support page
252 * sizes that fit in the backing store. Additionally we only
253 * advertize 64K pages if the processor is ARCH 2.06 and we assume
254 * P7 encodings for the SLB and hash table. Here too, we assume
255 * support for any newer processor will mean a kernel that
256 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
257 * this fallback.
258 */
259 if (kvmppc_is_pr(cs->kvm_state)) {
260 /* No flags */
261 info->flags = 0;
262 info->slb_size = 64;
263
264 /* Standard 4k base page size segment */
265 info->sps[0].page_shift = 12;
266 info->sps[0].slb_enc = 0;
267 info->sps[0].enc[0].page_shift = 12;
268 info->sps[0].enc[0].pte_enc = 0;
269
270 /* Standard 16M large page size segment */
271 info->sps[1].page_shift = 24;
272 info->sps[1].slb_enc = SLB_VSID_L;
273 info->sps[1].enc[0].page_shift = 24;
274 info->sps[1].enc[0].pte_enc = 0;
275 } else {
276 int i = 0;
277
278 /* HV KVM has backing store size restrictions */
279 info->flags = KVM_PPC_PAGE_SIZES_REAL;
280
281 if (env->mmu_model & POWERPC_MMU_1TSEG) {
282 info->flags |= KVM_PPC_1T_SEGMENTS;
283 }
284
285 if (env->mmu_model == POWERPC_MMU_2_06 ||
286 env->mmu_model == POWERPC_MMU_2_07) {
287 info->slb_size = 32;
288 } else {
289 info->slb_size = 64;
290 }
291
292 /* Standard 4k base page size segment */
293 info->sps[i].page_shift = 12;
294 info->sps[i].slb_enc = 0;
295 info->sps[i].enc[0].page_shift = 12;
296 info->sps[i].enc[0].pte_enc = 0;
297 i++;
298
299 /* 64K on MMU 2.06 and later */
300 if (env->mmu_model == POWERPC_MMU_2_06 ||
301 env->mmu_model == POWERPC_MMU_2_07) {
302 info->sps[i].page_shift = 16;
303 info->sps[i].slb_enc = 0x110;
304 info->sps[i].enc[0].page_shift = 16;
305 info->sps[i].enc[0].pte_enc = 1;
306 i++;
307 }
308
309 /* Standard 16M large page size segment */
310 info->sps[i].page_shift = 24;
311 info->sps[i].slb_enc = SLB_VSID_L;
312 info->sps[i].enc[0].page_shift = 24;
313 info->sps[i].enc[0].pte_enc = 0;
314 }
315 }
316
317 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
318 {
319 CPUState *cs = CPU(cpu);
320 int ret;
321
322 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
323 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
324 if (ret == 0) {
325 return;
326 }
327 }
328
329 kvm_get_fallback_smmu_info(cpu, info);
330 }
331
332 static long gethugepagesize(const char *mem_path)
333 {
334 struct statfs fs;
335 int ret;
336
337 do {
338 ret = statfs(mem_path, &fs);
339 } while (ret != 0 && errno == EINTR);
340
341 if (ret != 0) {
342 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
343 strerror(errno));
344 exit(1);
345 }
346
347 #define HUGETLBFS_MAGIC 0x958458f6
348
349 if (fs.f_type != HUGETLBFS_MAGIC) {
350 /* Explicit mempath, but it's ordinary pages */
351 return getpagesize();
352 }
353
354 /* It's hugepage, return the huge page size */
355 return fs.f_bsize;
356 }
357
358 /*
359 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
360 * may or may not name the same files / on the same filesystem now as
361 * when we actually open and map them. Iterate over the file
362 * descriptors instead, and use qemu_fd_getpagesize().
363 */
364 static int find_max_supported_pagesize(Object *obj, void *opaque)
365 {
366 char *mem_path;
367 long *hpsize_min = opaque;
368
369 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
370 mem_path = object_property_get_str(obj, "mem-path", NULL);
371 if (mem_path) {
372 long hpsize = gethugepagesize(mem_path);
373 if (hpsize < *hpsize_min) {
374 *hpsize_min = hpsize;
375 }
376 } else {
377 *hpsize_min = getpagesize();
378 }
379 }
380
381 return 0;
382 }
383
384 static long getrampagesize(void)
385 {
386 long hpsize = LONG_MAX;
387 long mainrampagesize;
388 Object *memdev_root;
389
390 if (mem_path) {
391 mainrampagesize = gethugepagesize(mem_path);
392 } else {
393 mainrampagesize = getpagesize();
394 }
395
396 /* it's possible we have memory-backend objects with
397 * hugepage-backed RAM. these may get mapped into system
398 * address space via -numa parameters or memory hotplug
399 * hooks. we want to take these into account, but we
400 * also want to make sure these supported hugepage
401 * sizes are applicable across the entire range of memory
402 * we may boot from, so we take the min across all
403 * backends, and assume normal pages in cases where a
404 * backend isn't backed by hugepages.
405 */
406 memdev_root = object_resolve_path("/objects", NULL);
407 if (memdev_root) {
408 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
409 }
410 if (hpsize == LONG_MAX) {
411 /* No additional memory regions found ==> Report main RAM page size */
412 return mainrampagesize;
413 }
414
415 /* If NUMA is disabled or the NUMA nodes are not backed with a
416 * memory-backend, then there is at least one node using "normal" RAM,
417 * so if its page size is smaller we have got to report that size instead.
418 */
419 if (hpsize > mainrampagesize &&
420 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
421 static bool warned;
422 if (!warned) {
423 error_report("Huge page support disabled (n/a for main memory).");
424 warned = true;
425 }
426 return mainrampagesize;
427 }
428
429 return hpsize;
430 }
431
432 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
433 {
434 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
435 return true;
436 }
437
438 return (1ul << shift) <= rampgsize;
439 }
440
441 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
442 {
443 static struct kvm_ppc_smmu_info smmu_info;
444 static bool has_smmu_info;
445 CPUPPCState *env = &cpu->env;
446 long rampagesize;
447 int iq, ik, jq, jk;
448 bool has_64k_pages = false;
449
450 /* We only handle page sizes for 64-bit server guests for now */
451 if (!(env->mmu_model & POWERPC_MMU_64)) {
452 return;
453 }
454
455 /* Collect MMU info from kernel if not already */
456 if (!has_smmu_info) {
457 kvm_get_smmu_info(cpu, &smmu_info);
458 has_smmu_info = true;
459 }
460
461 rampagesize = getrampagesize();
462
463 /* Convert to QEMU form */
464 memset(&env->sps, 0, sizeof(env->sps));
465
466 /* If we have HV KVM, we need to forbid CI large pages if our
467 * host page size is smaller than 64K.
468 */
469 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
470 env->ci_large_pages = getpagesize() >= 0x10000;
471 }
472
473 /*
474 * XXX This loop should be an entry wide AND of the capabilities that
475 * the selected CPU has with the capabilities that KVM supports.
476 */
477 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
478 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
479 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
480
481 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
482 ksps->page_shift)) {
483 continue;
484 }
485 qsps->page_shift = ksps->page_shift;
486 qsps->slb_enc = ksps->slb_enc;
487 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
488 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
489 ksps->enc[jk].page_shift)) {
490 continue;
491 }
492 if (ksps->enc[jk].page_shift == 16) {
493 has_64k_pages = true;
494 }
495 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
496 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
497 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
498 break;
499 }
500 }
501 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
502 break;
503 }
504 }
505 env->slb_nr = smmu_info.slb_size;
506 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
507 env->mmu_model &= ~POWERPC_MMU_1TSEG;
508 }
509 if (!has_64k_pages) {
510 env->mmu_model &= ~POWERPC_MMU_64K;
511 }
512 }
513 #else /* defined (TARGET_PPC64) */
514
515 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
516 {
517 }
518
519 #endif /* !defined (TARGET_PPC64) */
520
521 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
522 {
523 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
524 }
525
526 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
527 * book3s supports only 1 watchpoint, so array size
528 * of 4 is sufficient for now.
529 */
530 #define MAX_HW_BKPTS 4
531
532 static struct HWBreakpoint {
533 target_ulong addr;
534 int type;
535 } hw_debug_points[MAX_HW_BKPTS];
536
537 static CPUWatchpoint hw_watchpoint;
538
539 /* Default there is no breakpoint and watchpoint supported */
540 static int max_hw_breakpoint;
541 static int max_hw_watchpoint;
542 static int nb_hw_breakpoint;
543 static int nb_hw_watchpoint;
544
545 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
546 {
547 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
548 max_hw_breakpoint = 2;
549 max_hw_watchpoint = 2;
550 }
551
552 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
553 fprintf(stderr, "Error initializing h/w breakpoints\n");
554 return;
555 }
556 }
557
558 int kvm_arch_init_vcpu(CPUState *cs)
559 {
560 PowerPCCPU *cpu = POWERPC_CPU(cs);
561 CPUPPCState *cenv = &cpu->env;
562 int ret;
563
564 /* Gather server mmu info from KVM and update the CPU state */
565 kvm_fixup_page_sizes(cpu);
566
567 /* Synchronize sregs with kvm */
568 ret = kvm_arch_sync_sregs(cpu);
569 if (ret) {
570 if (ret == -EINVAL) {
571 error_report("Register sync failed... If you're using kvm-hv.ko,"
572 " only \"-cpu host\" is possible");
573 }
574 return ret;
575 }
576
577 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
578
579 switch (cenv->mmu_model) {
580 case POWERPC_MMU_BOOKE206:
581 /* This target supports access to KVM's guest TLB */
582 ret = kvm_booke206_tlb_init(cpu);
583 break;
584 case POWERPC_MMU_2_07:
585 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
586 /* KVM-HV has transactional memory on POWER8 also without the
587 * KVM_CAP_PPC_HTM extension, so enable it here instead. */
588 cap_htm = true;
589 }
590 break;
591 default:
592 break;
593 }
594
595 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
596 kvmppc_hw_debug_points_init(cenv);
597
598 return ret;
599 }
600
601 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
602 {
603 CPUPPCState *env = &cpu->env;
604 CPUState *cs = CPU(cpu);
605 struct kvm_dirty_tlb dirty_tlb;
606 unsigned char *bitmap;
607 int ret;
608
609 if (!env->kvm_sw_tlb) {
610 return;
611 }
612
613 bitmap = g_malloc((env->nb_tlb + 7) / 8);
614 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
615
616 dirty_tlb.bitmap = (uintptr_t)bitmap;
617 dirty_tlb.num_dirty = env->nb_tlb;
618
619 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
620 if (ret) {
621 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
622 __func__, strerror(-ret));
623 }
624
625 g_free(bitmap);
626 }
627
628 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
629 {
630 PowerPCCPU *cpu = POWERPC_CPU(cs);
631 CPUPPCState *env = &cpu->env;
632 union {
633 uint32_t u32;
634 uint64_t u64;
635 } val;
636 struct kvm_one_reg reg = {
637 .id = id,
638 .addr = (uintptr_t) &val,
639 };
640 int ret;
641
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret != 0) {
644 trace_kvm_failed_spr_get(spr, strerror(errno));
645 } else {
646 switch (id & KVM_REG_SIZE_MASK) {
647 case KVM_REG_SIZE_U32:
648 env->spr[spr] = val.u32;
649 break;
650
651 case KVM_REG_SIZE_U64:
652 env->spr[spr] = val.u64;
653 break;
654
655 default:
656 /* Don't handle this size yet */
657 abort();
658 }
659 }
660 }
661
662 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
663 {
664 PowerPCCPU *cpu = POWERPC_CPU(cs);
665 CPUPPCState *env = &cpu->env;
666 union {
667 uint32_t u32;
668 uint64_t u64;
669 } val;
670 struct kvm_one_reg reg = {
671 .id = id,
672 .addr = (uintptr_t) &val,
673 };
674 int ret;
675
676 switch (id & KVM_REG_SIZE_MASK) {
677 case KVM_REG_SIZE_U32:
678 val.u32 = env->spr[spr];
679 break;
680
681 case KVM_REG_SIZE_U64:
682 val.u64 = env->spr[spr];
683 break;
684
685 default:
686 /* Don't handle this size yet */
687 abort();
688 }
689
690 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
691 if (ret != 0) {
692 trace_kvm_failed_spr_set(spr, strerror(errno));
693 }
694 }
695
696 static int kvm_put_fp(CPUState *cs)
697 {
698 PowerPCCPU *cpu = POWERPC_CPU(cs);
699 CPUPPCState *env = &cpu->env;
700 struct kvm_one_reg reg;
701 int i;
702 int ret;
703
704 if (env->insns_flags & PPC_FLOAT) {
705 uint64_t fpscr = env->fpscr;
706 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
707
708 reg.id = KVM_REG_PPC_FPSCR;
709 reg.addr = (uintptr_t)&fpscr;
710 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
711 if (ret < 0) {
712 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
713 return ret;
714 }
715
716 for (i = 0; i < 32; i++) {
717 uint64_t vsr[2];
718
719 #ifdef HOST_WORDS_BIGENDIAN
720 vsr[0] = float64_val(env->fpr[i]);
721 vsr[1] = env->vsr[i];
722 #else
723 vsr[0] = env->vsr[i];
724 vsr[1] = float64_val(env->fpr[i]);
725 #endif
726 reg.addr = (uintptr_t) &vsr;
727 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
728
729 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
730 if (ret < 0) {
731 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
732 i, strerror(errno));
733 return ret;
734 }
735 }
736 }
737
738 if (env->insns_flags & PPC_ALTIVEC) {
739 reg.id = KVM_REG_PPC_VSCR;
740 reg.addr = (uintptr_t)&env->vscr;
741 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
742 if (ret < 0) {
743 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
744 return ret;
745 }
746
747 for (i = 0; i < 32; i++) {
748 reg.id = KVM_REG_PPC_VR(i);
749 reg.addr = (uintptr_t)&env->avr[i];
750 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
751 if (ret < 0) {
752 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
753 return ret;
754 }
755 }
756 }
757
758 return 0;
759 }
760
761 static int kvm_get_fp(CPUState *cs)
762 {
763 PowerPCCPU *cpu = POWERPC_CPU(cs);
764 CPUPPCState *env = &cpu->env;
765 struct kvm_one_reg reg;
766 int i;
767 int ret;
768
769 if (env->insns_flags & PPC_FLOAT) {
770 uint64_t fpscr;
771 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
772
773 reg.id = KVM_REG_PPC_FPSCR;
774 reg.addr = (uintptr_t)&fpscr;
775 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
776 if (ret < 0) {
777 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
778 return ret;
779 } else {
780 env->fpscr = fpscr;
781 }
782
783 for (i = 0; i < 32; i++) {
784 uint64_t vsr[2];
785
786 reg.addr = (uintptr_t) &vsr;
787 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
788
789 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
790 if (ret < 0) {
791 DPRINTF("Unable to get %s%d from KVM: %s\n",
792 vsx ? "VSR" : "FPR", i, strerror(errno));
793 return ret;
794 } else {
795 #ifdef HOST_WORDS_BIGENDIAN
796 env->fpr[i] = vsr[0];
797 if (vsx) {
798 env->vsr[i] = vsr[1];
799 }
800 #else
801 env->fpr[i] = vsr[1];
802 if (vsx) {
803 env->vsr[i] = vsr[0];
804 }
805 #endif
806 }
807 }
808 }
809
810 if (env->insns_flags & PPC_ALTIVEC) {
811 reg.id = KVM_REG_PPC_VSCR;
812 reg.addr = (uintptr_t)&env->vscr;
813 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
814 if (ret < 0) {
815 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
816 return ret;
817 }
818
819 for (i = 0; i < 32; i++) {
820 reg.id = KVM_REG_PPC_VR(i);
821 reg.addr = (uintptr_t)&env->avr[i];
822 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
823 if (ret < 0) {
824 DPRINTF("Unable to get VR%d from KVM: %s\n",
825 i, strerror(errno));
826 return ret;
827 }
828 }
829 }
830
831 return 0;
832 }
833
834 #if defined(TARGET_PPC64)
835 static int kvm_get_vpa(CPUState *cs)
836 {
837 PowerPCCPU *cpu = POWERPC_CPU(cs);
838 CPUPPCState *env = &cpu->env;
839 struct kvm_one_reg reg;
840 int ret;
841
842 reg.id = KVM_REG_PPC_VPA_ADDR;
843 reg.addr = (uintptr_t)&env->vpa_addr;
844 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
845 if (ret < 0) {
846 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
847 return ret;
848 }
849
850 assert((uintptr_t)&env->slb_shadow_size
851 == ((uintptr_t)&env->slb_shadow_addr + 8));
852 reg.id = KVM_REG_PPC_VPA_SLB;
853 reg.addr = (uintptr_t)&env->slb_shadow_addr;
854 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
855 if (ret < 0) {
856 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
857 strerror(errno));
858 return ret;
859 }
860
861 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
862 reg.id = KVM_REG_PPC_VPA_DTL;
863 reg.addr = (uintptr_t)&env->dtl_addr;
864 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
865 if (ret < 0) {
866 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
867 strerror(errno));
868 return ret;
869 }
870
871 return 0;
872 }
873
874 static int kvm_put_vpa(CPUState *cs)
875 {
876 PowerPCCPU *cpu = POWERPC_CPU(cs);
877 CPUPPCState *env = &cpu->env;
878 struct kvm_one_reg reg;
879 int ret;
880
881 /* SLB shadow or DTL can't be registered unless a master VPA is
882 * registered. That means when restoring state, if a VPA *is*
883 * registered, we need to set that up first. If not, we need to
884 * deregister the others before deregistering the master VPA */
885 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
886
887 if (env->vpa_addr) {
888 reg.id = KVM_REG_PPC_VPA_ADDR;
889 reg.addr = (uintptr_t)&env->vpa_addr;
890 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
891 if (ret < 0) {
892 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
893 return ret;
894 }
895 }
896
897 assert((uintptr_t)&env->slb_shadow_size
898 == ((uintptr_t)&env->slb_shadow_addr + 8));
899 reg.id = KVM_REG_PPC_VPA_SLB;
900 reg.addr = (uintptr_t)&env->slb_shadow_addr;
901 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
902 if (ret < 0) {
903 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
904 return ret;
905 }
906
907 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
908 reg.id = KVM_REG_PPC_VPA_DTL;
909 reg.addr = (uintptr_t)&env->dtl_addr;
910 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
911 if (ret < 0) {
912 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
913 strerror(errno));
914 return ret;
915 }
916
917 if (!env->vpa_addr) {
918 reg.id = KVM_REG_PPC_VPA_ADDR;
919 reg.addr = (uintptr_t)&env->vpa_addr;
920 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
921 if (ret < 0) {
922 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
923 return ret;
924 }
925 }
926
927 return 0;
928 }
929 #endif /* TARGET_PPC64 */
930
931 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
932 {
933 CPUPPCState *env = &cpu->env;
934 struct kvm_sregs sregs;
935 int i;
936
937 sregs.pvr = env->spr[SPR_PVR];
938
939 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
940
941 /* Sync SLB */
942 #ifdef TARGET_PPC64
943 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
944 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
945 if (env->slb[i].esid & SLB_ESID_V) {
946 sregs.u.s.ppc64.slb[i].slbe |= i;
947 }
948 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
949 }
950 #endif
951
952 /* Sync SRs */
953 for (i = 0; i < 16; i++) {
954 sregs.u.s.ppc32.sr[i] = env->sr[i];
955 }
956
957 /* Sync BATs */
958 for (i = 0; i < 8; i++) {
959 /* Beware. We have to swap upper and lower bits here */
960 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
961 | env->DBAT[1][i];
962 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
963 | env->IBAT[1][i];
964 }
965
966 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
967 }
968
969 int kvm_arch_put_registers(CPUState *cs, int level)
970 {
971 PowerPCCPU *cpu = POWERPC_CPU(cs);
972 CPUPPCState *env = &cpu->env;
973 struct kvm_regs regs;
974 int ret;
975 int i;
976
977 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
978 if (ret < 0) {
979 return ret;
980 }
981
982 regs.ctr = env->ctr;
983 regs.lr = env->lr;
984 regs.xer = cpu_read_xer(env);
985 regs.msr = env->msr;
986 regs.pc = env->nip;
987
988 regs.srr0 = env->spr[SPR_SRR0];
989 regs.srr1 = env->spr[SPR_SRR1];
990
991 regs.sprg0 = env->spr[SPR_SPRG0];
992 regs.sprg1 = env->spr[SPR_SPRG1];
993 regs.sprg2 = env->spr[SPR_SPRG2];
994 regs.sprg3 = env->spr[SPR_SPRG3];
995 regs.sprg4 = env->spr[SPR_SPRG4];
996 regs.sprg5 = env->spr[SPR_SPRG5];
997 regs.sprg6 = env->spr[SPR_SPRG6];
998 regs.sprg7 = env->spr[SPR_SPRG7];
999
1000 regs.pid = env->spr[SPR_BOOKE_PID];
1001
1002 for (i = 0;i < 32; i++)
1003 regs.gpr[i] = env->gpr[i];
1004
1005 regs.cr = 0;
1006 for (i = 0; i < 8; i++) {
1007 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1008 }
1009
1010 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1011 if (ret < 0)
1012 return ret;
1013
1014 kvm_put_fp(cs);
1015
1016 if (env->tlb_dirty) {
1017 kvm_sw_tlb_put(cpu);
1018 env->tlb_dirty = false;
1019 }
1020
1021 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1022 ret = kvmppc_put_books_sregs(cpu);
1023 if (ret < 0) {
1024 return ret;
1025 }
1026 }
1027
1028 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1029 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1030 }
1031
1032 if (cap_one_reg) {
1033 int i;
1034
1035 /* We deliberately ignore errors here, for kernels which have
1036 * the ONE_REG calls, but don't support the specific
1037 * registers, there's a reasonable chance things will still
1038 * work, at least until we try to migrate. */
1039 for (i = 0; i < 1024; i++) {
1040 uint64_t id = env->spr_cb[i].one_reg_id;
1041
1042 if (id != 0) {
1043 kvm_put_one_spr(cs, id, i);
1044 }
1045 }
1046
1047 #ifdef TARGET_PPC64
1048 if (msr_ts) {
1049 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1050 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1051 }
1052 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1054 }
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1065 }
1066
1067 if (cap_papr) {
1068 if (kvm_put_vpa(cs) < 0) {
1069 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1070 }
1071 }
1072
1073 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1074 #endif /* TARGET_PPC64 */
1075 }
1076
1077 return ret;
1078 }
1079
1080 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1081 {
1082 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1083 }
1084
1085 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1086 {
1087 CPUPPCState *env = &cpu->env;
1088 struct kvm_sregs sregs;
1089 int ret;
1090
1091 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1092 if (ret < 0) {
1093 return ret;
1094 }
1095
1096 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1097 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1098 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1099 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1100 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1101 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1102 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1103 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1104 env->spr[SPR_DECR] = sregs.u.e.dec;
1105 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1106 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1107 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1108 }
1109
1110 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1111 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1112 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1113 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1114 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1115 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1116 }
1117
1118 if (sregs.u.e.features & KVM_SREGS_E_64) {
1119 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1120 }
1121
1122 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1123 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1124 }
1125
1126 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1127 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1128 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1129 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1130 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1131 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1132 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1133 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1134 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1135 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1136 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1137 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1138 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1139 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1140 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1141 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1142 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1143 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1144 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1145 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1146 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1147 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1148 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1149 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1150 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1151 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1152 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1153 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1154 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1155 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1156 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1157 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1158 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1159
1160 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1161 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1162 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1163 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1164 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1165 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1166 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1167 }
1168
1169 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1170 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1171 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1172 }
1173
1174 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1175 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1176 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1177 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1178 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1179 }
1180 }
1181
1182 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1183 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1184 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1185 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1186 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1187 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1188 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1189 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1190 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1191 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1192 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1193 }
1194
1195 if (sregs.u.e.features & KVM_SREGS_EXP) {
1196 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1197 }
1198
1199 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1200 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1201 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1202 }
1203
1204 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1205 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1206 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1207 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1208
1209 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1210 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1211 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1212 }
1213 }
1214
1215 return 0;
1216 }
1217
1218 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1219 {
1220 CPUPPCState *env = &cpu->env;
1221 struct kvm_sregs sregs;
1222 int ret;
1223 int i;
1224
1225 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1226 if (ret < 0) {
1227 return ret;
1228 }
1229
1230 if (!env->external_htab) {
1231 ppc_store_sdr1(env, sregs.u.s.sdr1);
1232 }
1233
1234 /* Sync SLB */
1235 #ifdef TARGET_PPC64
1236 /*
1237 * The packed SLB array we get from KVM_GET_SREGS only contains
1238 * information about valid entries. So we flush our internal copy
1239 * to get rid of stale ones, then put all valid SLB entries back
1240 * in.
1241 */
1242 memset(env->slb, 0, sizeof(env->slb));
1243 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1244 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1245 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1246 /*
1247 * Only restore valid entries
1248 */
1249 if (rb & SLB_ESID_V) {
1250 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1251 }
1252 }
1253 #endif
1254
1255 /* Sync SRs */
1256 for (i = 0; i < 16; i++) {
1257 env->sr[i] = sregs.u.s.ppc32.sr[i];
1258 }
1259
1260 /* Sync BATs */
1261 for (i = 0; i < 8; i++) {
1262 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1263 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1264 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1265 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1266 }
1267
1268 return 0;
1269 }
1270
1271 int kvm_arch_get_registers(CPUState *cs)
1272 {
1273 PowerPCCPU *cpu = POWERPC_CPU(cs);
1274 CPUPPCState *env = &cpu->env;
1275 struct kvm_regs regs;
1276 uint32_t cr;
1277 int i, ret;
1278
1279 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1280 if (ret < 0)
1281 return ret;
1282
1283 cr = regs.cr;
1284 for (i = 7; i >= 0; i--) {
1285 env->crf[i] = cr & 15;
1286 cr >>= 4;
1287 }
1288
1289 env->ctr = regs.ctr;
1290 env->lr = regs.lr;
1291 cpu_write_xer(env, regs.xer);
1292 env->msr = regs.msr;
1293 env->nip = regs.pc;
1294
1295 env->spr[SPR_SRR0] = regs.srr0;
1296 env->spr[SPR_SRR1] = regs.srr1;
1297
1298 env->spr[SPR_SPRG0] = regs.sprg0;
1299 env->spr[SPR_SPRG1] = regs.sprg1;
1300 env->spr[SPR_SPRG2] = regs.sprg2;
1301 env->spr[SPR_SPRG3] = regs.sprg3;
1302 env->spr[SPR_SPRG4] = regs.sprg4;
1303 env->spr[SPR_SPRG5] = regs.sprg5;
1304 env->spr[SPR_SPRG6] = regs.sprg6;
1305 env->spr[SPR_SPRG7] = regs.sprg7;
1306
1307 env->spr[SPR_BOOKE_PID] = regs.pid;
1308
1309 for (i = 0;i < 32; i++)
1310 env->gpr[i] = regs.gpr[i];
1311
1312 kvm_get_fp(cs);
1313
1314 if (cap_booke_sregs) {
1315 ret = kvmppc_get_booke_sregs(cpu);
1316 if (ret < 0) {
1317 return ret;
1318 }
1319 }
1320
1321 if (cap_segstate) {
1322 ret = kvmppc_get_books_sregs(cpu);
1323 if (ret < 0) {
1324 return ret;
1325 }
1326 }
1327
1328 if (cap_hior) {
1329 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1330 }
1331
1332 if (cap_one_reg) {
1333 int i;
1334
1335 /* We deliberately ignore errors here, for kernels which have
1336 * the ONE_REG calls, but don't support the specific
1337 * registers, there's a reasonable chance things will still
1338 * work, at least until we try to migrate. */
1339 for (i = 0; i < 1024; i++) {
1340 uint64_t id = env->spr_cb[i].one_reg_id;
1341
1342 if (id != 0) {
1343 kvm_get_one_spr(cs, id, i);
1344 }
1345 }
1346
1347 #ifdef TARGET_PPC64
1348 if (msr_ts) {
1349 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1350 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1351 }
1352 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1354 }
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1365 }
1366
1367 if (cap_papr) {
1368 if (kvm_get_vpa(cs) < 0) {
1369 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1370 }
1371 }
1372
1373 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1374 #endif
1375 }
1376
1377 return 0;
1378 }
1379
1380 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1381 {
1382 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1383
1384 if (irq != PPC_INTERRUPT_EXT) {
1385 return 0;
1386 }
1387
1388 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1389 return 0;
1390 }
1391
1392 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1393
1394 return 0;
1395 }
1396
1397 #if defined(TARGET_PPCEMB)
1398 #define PPC_INPUT_INT PPC40x_INPUT_INT
1399 #elif defined(TARGET_PPC64)
1400 #define PPC_INPUT_INT PPC970_INPUT_INT
1401 #else
1402 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1403 #endif
1404
1405 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1406 {
1407 PowerPCCPU *cpu = POWERPC_CPU(cs);
1408 CPUPPCState *env = &cpu->env;
1409 int r;
1410 unsigned irq;
1411
1412 qemu_mutex_lock_iothread();
1413
1414 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1415 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1416 if (!cap_interrupt_level &&
1417 run->ready_for_interrupt_injection &&
1418 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1419 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1420 {
1421 /* For now KVM disregards the 'irq' argument. However, in the
1422 * future KVM could cache it in-kernel to avoid a heavyweight exit
1423 * when reading the UIC.
1424 */
1425 irq = KVM_INTERRUPT_SET;
1426
1427 DPRINTF("injected interrupt %d\n", irq);
1428 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1429 if (r < 0) {
1430 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1431 }
1432
1433 /* Always wake up soon in case the interrupt was level based */
1434 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1435 (NANOSECONDS_PER_SECOND / 50));
1436 }
1437
1438 /* We don't know if there are more interrupts pending after this. However,
1439 * the guest will return to userspace in the course of handling this one
1440 * anyways, so we will get a chance to deliver the rest. */
1441
1442 qemu_mutex_unlock_iothread();
1443 }
1444
1445 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1446 {
1447 return MEMTXATTRS_UNSPECIFIED;
1448 }
1449
1450 int kvm_arch_process_async_events(CPUState *cs)
1451 {
1452 return cs->halted;
1453 }
1454
1455 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1456 {
1457 CPUState *cs = CPU(cpu);
1458 CPUPPCState *env = &cpu->env;
1459
1460 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1461 cs->halted = 1;
1462 cs->exception_index = EXCP_HLT;
1463 }
1464
1465 return 0;
1466 }
1467
1468 /* map dcr access to existing qemu dcr emulation */
1469 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1470 {
1471 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1472 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1473
1474 return 0;
1475 }
1476
1477 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1478 {
1479 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1480 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1481
1482 return 0;
1483 }
1484
1485 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1486 {
1487 /* Mixed endian case is not handled */
1488 uint32_t sc = debug_inst_opcode;
1489
1490 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1491 sizeof(sc), 0) ||
1492 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1493 return -EINVAL;
1494 }
1495
1496 return 0;
1497 }
1498
1499 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1500 {
1501 uint32_t sc;
1502
1503 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1504 sc != debug_inst_opcode ||
1505 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1506 sizeof(sc), 1)) {
1507 return -EINVAL;
1508 }
1509
1510 return 0;
1511 }
1512
1513 static int find_hw_breakpoint(target_ulong addr, int type)
1514 {
1515 int n;
1516
1517 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1518 <= ARRAY_SIZE(hw_debug_points));
1519
1520 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1521 if (hw_debug_points[n].addr == addr &&
1522 hw_debug_points[n].type == type) {
1523 return n;
1524 }
1525 }
1526
1527 return -1;
1528 }
1529
1530 static int find_hw_watchpoint(target_ulong addr, int *flag)
1531 {
1532 int n;
1533
1534 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1535 if (n >= 0) {
1536 *flag = BP_MEM_ACCESS;
1537 return n;
1538 }
1539
1540 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1541 if (n >= 0) {
1542 *flag = BP_MEM_WRITE;
1543 return n;
1544 }
1545
1546 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1547 if (n >= 0) {
1548 *flag = BP_MEM_READ;
1549 return n;
1550 }
1551
1552 return -1;
1553 }
1554
1555 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1556 target_ulong len, int type)
1557 {
1558 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1559 return -ENOBUFS;
1560 }
1561
1562 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1563 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1564
1565 switch (type) {
1566 case GDB_BREAKPOINT_HW:
1567 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1568 return -ENOBUFS;
1569 }
1570
1571 if (find_hw_breakpoint(addr, type) >= 0) {
1572 return -EEXIST;
1573 }
1574
1575 nb_hw_breakpoint++;
1576 break;
1577
1578 case GDB_WATCHPOINT_WRITE:
1579 case GDB_WATCHPOINT_READ:
1580 case GDB_WATCHPOINT_ACCESS:
1581 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1582 return -ENOBUFS;
1583 }
1584
1585 if (find_hw_breakpoint(addr, type) >= 0) {
1586 return -EEXIST;
1587 }
1588
1589 nb_hw_watchpoint++;
1590 break;
1591
1592 default:
1593 return -ENOSYS;
1594 }
1595
1596 return 0;
1597 }
1598
1599 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1600 target_ulong len, int type)
1601 {
1602 int n;
1603
1604 n = find_hw_breakpoint(addr, type);
1605 if (n < 0) {
1606 return -ENOENT;
1607 }
1608
1609 switch (type) {
1610 case GDB_BREAKPOINT_HW:
1611 nb_hw_breakpoint--;
1612 break;
1613
1614 case GDB_WATCHPOINT_WRITE:
1615 case GDB_WATCHPOINT_READ:
1616 case GDB_WATCHPOINT_ACCESS:
1617 nb_hw_watchpoint--;
1618 break;
1619
1620 default:
1621 return -ENOSYS;
1622 }
1623 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1624
1625 return 0;
1626 }
1627
1628 void kvm_arch_remove_all_hw_breakpoints(void)
1629 {
1630 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1631 }
1632
1633 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1634 {
1635 int n;
1636
1637 /* Software Breakpoint updates */
1638 if (kvm_sw_breakpoints_active(cs)) {
1639 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1640 }
1641
1642 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1643 <= ARRAY_SIZE(hw_debug_points));
1644 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1645
1646 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1647 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1648 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1649 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1650 switch (hw_debug_points[n].type) {
1651 case GDB_BREAKPOINT_HW:
1652 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1653 break;
1654 case GDB_WATCHPOINT_WRITE:
1655 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1656 break;
1657 case GDB_WATCHPOINT_READ:
1658 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1659 break;
1660 case GDB_WATCHPOINT_ACCESS:
1661 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1662 KVMPPC_DEBUG_WATCH_READ;
1663 break;
1664 default:
1665 cpu_abort(cs, "Unsupported breakpoint type\n");
1666 }
1667 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1668 }
1669 }
1670 }
1671
1672 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1673 {
1674 CPUState *cs = CPU(cpu);
1675 CPUPPCState *env = &cpu->env;
1676 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1677 int handle = 0;
1678 int n;
1679 int flag = 0;
1680
1681 if (cs->singlestep_enabled) {
1682 handle = 1;
1683 } else if (arch_info->status) {
1684 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1685 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1686 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1687 if (n >= 0) {
1688 handle = 1;
1689 }
1690 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1691 KVMPPC_DEBUG_WATCH_WRITE)) {
1692 n = find_hw_watchpoint(arch_info->address, &flag);
1693 if (n >= 0) {
1694 handle = 1;
1695 cs->watchpoint_hit = &hw_watchpoint;
1696 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1697 hw_watchpoint.flags = flag;
1698 }
1699 }
1700 }
1701 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1702 handle = 1;
1703 } else {
1704 /* QEMU is not able to handle debug exception, so inject
1705 * program exception to guest;
1706 * Yes program exception NOT debug exception !!
1707 * When QEMU is using debug resources then debug exception must
1708 * be always set. To achieve this we set MSR_DE and also set
1709 * MSRP_DEP so guest cannot change MSR_DE.
1710 * When emulating debug resource for guest we want guest
1711 * to control MSR_DE (enable/disable debug interrupt on need).
1712 * Supporting both configurations are NOT possible.
1713 * So the result is that we cannot share debug resources
1714 * between QEMU and Guest on BOOKE architecture.
1715 * In the current design QEMU gets the priority over guest,
1716 * this means that if QEMU is using debug resources then guest
1717 * cannot use them;
1718 * For software breakpoint QEMU uses a privileged instruction;
1719 * So there cannot be any reason that we are here for guest
1720 * set debug exception, only possibility is guest executed a
1721 * privileged / illegal instruction and that's why we are
1722 * injecting a program interrupt.
1723 */
1724
1725 cpu_synchronize_state(cs);
1726 /* env->nip is PC, so increment this by 4 to use
1727 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1728 */
1729 env->nip += 4;
1730 cs->exception_index = POWERPC_EXCP_PROGRAM;
1731 env->error_code = POWERPC_EXCP_INVAL;
1732 ppc_cpu_do_interrupt(cs);
1733 }
1734
1735 return handle;
1736 }
1737
1738 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1739 {
1740 PowerPCCPU *cpu = POWERPC_CPU(cs);
1741 CPUPPCState *env = &cpu->env;
1742 int ret;
1743
1744 qemu_mutex_lock_iothread();
1745
1746 switch (run->exit_reason) {
1747 case KVM_EXIT_DCR:
1748 if (run->dcr.is_write) {
1749 DPRINTF("handle dcr write\n");
1750 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1751 } else {
1752 DPRINTF("handle dcr read\n");
1753 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1754 }
1755 break;
1756 case KVM_EXIT_HLT:
1757 DPRINTF("handle halt\n");
1758 ret = kvmppc_handle_halt(cpu);
1759 break;
1760 #if defined(TARGET_PPC64)
1761 case KVM_EXIT_PAPR_HCALL:
1762 DPRINTF("handle PAPR hypercall\n");
1763 run->papr_hcall.ret = spapr_hypercall(cpu,
1764 run->papr_hcall.nr,
1765 run->papr_hcall.args);
1766 ret = 0;
1767 break;
1768 #endif
1769 case KVM_EXIT_EPR:
1770 DPRINTF("handle epr\n");
1771 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1772 ret = 0;
1773 break;
1774 case KVM_EXIT_WATCHDOG:
1775 DPRINTF("handle watchdog expiry\n");
1776 watchdog_perform_action();
1777 ret = 0;
1778 break;
1779
1780 case KVM_EXIT_DEBUG:
1781 DPRINTF("handle debug exception\n");
1782 if (kvm_handle_debug(cpu, run)) {
1783 ret = EXCP_DEBUG;
1784 break;
1785 }
1786 /* re-enter, this exception was guest-internal */
1787 ret = 0;
1788 break;
1789
1790 default:
1791 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1792 ret = -1;
1793 break;
1794 }
1795
1796 qemu_mutex_unlock_iothread();
1797 return ret;
1798 }
1799
1800 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1801 {
1802 CPUState *cs = CPU(cpu);
1803 uint32_t bits = tsr_bits;
1804 struct kvm_one_reg reg = {
1805 .id = KVM_REG_PPC_OR_TSR,
1806 .addr = (uintptr_t) &bits,
1807 };
1808
1809 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1810 }
1811
1812 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1813 {
1814
1815 CPUState *cs = CPU(cpu);
1816 uint32_t bits = tsr_bits;
1817 struct kvm_one_reg reg = {
1818 .id = KVM_REG_PPC_CLEAR_TSR,
1819 .addr = (uintptr_t) &bits,
1820 };
1821
1822 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1823 }
1824
1825 int kvmppc_set_tcr(PowerPCCPU *cpu)
1826 {
1827 CPUState *cs = CPU(cpu);
1828 CPUPPCState *env = &cpu->env;
1829 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1830
1831 struct kvm_one_reg reg = {
1832 .id = KVM_REG_PPC_TCR,
1833 .addr = (uintptr_t) &tcr,
1834 };
1835
1836 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1837 }
1838
1839 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1840 {
1841 CPUState *cs = CPU(cpu);
1842 int ret;
1843
1844 if (!kvm_enabled()) {
1845 return -1;
1846 }
1847
1848 if (!cap_ppc_watchdog) {
1849 printf("warning: KVM does not support watchdog");
1850 return -1;
1851 }
1852
1853 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1854 if (ret < 0) {
1855 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1856 __func__, strerror(-ret));
1857 return ret;
1858 }
1859
1860 return ret;
1861 }
1862
1863 static int read_cpuinfo(const char *field, char *value, int len)
1864 {
1865 FILE *f;
1866 int ret = -1;
1867 int field_len = strlen(field);
1868 char line[512];
1869
1870 f = fopen("/proc/cpuinfo", "r");
1871 if (!f) {
1872 return -1;
1873 }
1874
1875 do {
1876 if (!fgets(line, sizeof(line), f)) {
1877 break;
1878 }
1879 if (!strncmp(line, field, field_len)) {
1880 pstrcpy(value, len, line);
1881 ret = 0;
1882 break;
1883 }
1884 } while(*line);
1885
1886 fclose(f);
1887
1888 return ret;
1889 }
1890
1891 uint32_t kvmppc_get_tbfreq(void)
1892 {
1893 char line[512];
1894 char *ns;
1895 uint32_t retval = NANOSECONDS_PER_SECOND;
1896
1897 if (read_cpuinfo("timebase", line, sizeof(line))) {
1898 return retval;
1899 }
1900
1901 if (!(ns = strchr(line, ':'))) {
1902 return retval;
1903 }
1904
1905 ns++;
1906
1907 return atoi(ns);
1908 }
1909
1910 bool kvmppc_get_host_serial(char **value)
1911 {
1912 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1913 NULL);
1914 }
1915
1916 bool kvmppc_get_host_model(char **value)
1917 {
1918 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1919 }
1920
1921 /* Try to find a device tree node for a CPU with clock-frequency property */
1922 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1923 {
1924 struct dirent *dirp;
1925 DIR *dp;
1926
1927 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1928 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1929 return -1;
1930 }
1931
1932 buf[0] = '\0';
1933 while ((dirp = readdir(dp)) != NULL) {
1934 FILE *f;
1935 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1936 dirp->d_name);
1937 f = fopen(buf, "r");
1938 if (f) {
1939 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1940 fclose(f);
1941 break;
1942 }
1943 buf[0] = '\0';
1944 }
1945 closedir(dp);
1946 if (buf[0] == '\0') {
1947 printf("Unknown host!\n");
1948 return -1;
1949 }
1950
1951 return 0;
1952 }
1953
1954 static uint64_t kvmppc_read_int_dt(const char *filename)
1955 {
1956 union {
1957 uint32_t v32;
1958 uint64_t v64;
1959 } u;
1960 FILE *f;
1961 int len;
1962
1963 f = fopen(filename, "rb");
1964 if (!f) {
1965 return -1;
1966 }
1967
1968 len = fread(&u, 1, sizeof(u), f);
1969 fclose(f);
1970 switch (len) {
1971 case 4:
1972 /* property is a 32-bit quantity */
1973 return be32_to_cpu(u.v32);
1974 case 8:
1975 return be64_to_cpu(u.v64);
1976 }
1977
1978 return 0;
1979 }
1980
1981 /* Read a CPU node property from the host device tree that's a single
1982 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1983 * (can't find or open the property, or doesn't understand the
1984 * format) */
1985 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1986 {
1987 char buf[PATH_MAX], *tmp;
1988 uint64_t val;
1989
1990 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1991 return -1;
1992 }
1993
1994 tmp = g_strdup_printf("%s/%s", buf, propname);
1995 val = kvmppc_read_int_dt(tmp);
1996 g_free(tmp);
1997
1998 return val;
1999 }
2000
2001 uint64_t kvmppc_get_clockfreq(void)
2002 {
2003 return kvmppc_read_int_cpu_dt("clock-frequency");
2004 }
2005
2006 uint32_t kvmppc_get_vmx(void)
2007 {
2008 return kvmppc_read_int_cpu_dt("ibm,vmx");
2009 }
2010
2011 uint32_t kvmppc_get_dfp(void)
2012 {
2013 return kvmppc_read_int_cpu_dt("ibm,dfp");
2014 }
2015
2016 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2017 {
2018 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2019 CPUState *cs = CPU(cpu);
2020
2021 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2022 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2023 return 0;
2024 }
2025
2026 return 1;
2027 }
2028
2029 int kvmppc_get_hasidle(CPUPPCState *env)
2030 {
2031 struct kvm_ppc_pvinfo pvinfo;
2032
2033 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2034 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2035 return 1;
2036 }
2037
2038 return 0;
2039 }
2040
2041 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2042 {
2043 uint32_t *hc = (uint32_t*)buf;
2044 struct kvm_ppc_pvinfo pvinfo;
2045
2046 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2047 memcpy(buf, pvinfo.hcall, buf_len);
2048 return 0;
2049 }
2050
2051 /*
2052 * Fallback to always fail hypercalls regardless of endianness:
2053 *
2054 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2055 * li r3, -1
2056 * b .+8 (becomes nop in wrong endian)
2057 * bswap32(li r3, -1)
2058 */
2059
2060 hc[0] = cpu_to_be32(0x08000048);
2061 hc[1] = cpu_to_be32(0x3860ffff);
2062 hc[2] = cpu_to_be32(0x48000008);
2063 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2064
2065 return 1;
2066 }
2067
2068 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2069 {
2070 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2071 }
2072
2073 void kvmppc_enable_logical_ci_hcalls(void)
2074 {
2075 /*
2076 * FIXME: it would be nice if we could detect the cases where
2077 * we're using a device which requires the in kernel
2078 * implementation of these hcalls, but the kernel lacks them and
2079 * produce a warning.
2080 */
2081 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2082 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2083 }
2084
2085 void kvmppc_enable_set_mode_hcall(void)
2086 {
2087 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2088 }
2089
2090 void kvmppc_enable_clear_ref_mod_hcalls(void)
2091 {
2092 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2093 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2094 }
2095
2096 void kvmppc_set_papr(PowerPCCPU *cpu)
2097 {
2098 CPUState *cs = CPU(cpu);
2099 int ret;
2100
2101 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2102 if (ret) {
2103 error_report("This vCPU type or KVM version does not support PAPR");
2104 exit(1);
2105 }
2106
2107 /* Update the capability flag so we sync the right information
2108 * with kvm */
2109 cap_papr = 1;
2110 }
2111
2112 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2113 {
2114 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2115 }
2116
2117 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2118 {
2119 CPUState *cs = CPU(cpu);
2120 int ret;
2121
2122 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2123 if (ret && mpic_proxy) {
2124 error_report("This KVM version does not support EPR");
2125 exit(1);
2126 }
2127 }
2128
2129 int kvmppc_smt_threads(void)
2130 {
2131 return cap_ppc_smt ? cap_ppc_smt : 1;
2132 }
2133
2134 #ifdef TARGET_PPC64
2135 off_t kvmppc_alloc_rma(void **rma)
2136 {
2137 off_t size;
2138 int fd;
2139 struct kvm_allocate_rma ret;
2140
2141 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2142 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2143 * not necessary on this hardware
2144 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2145 *
2146 * FIXME: We should allow the user to force contiguous RMA
2147 * allocation in the cap_ppc_rma==1 case.
2148 */
2149 if (cap_ppc_rma < 2) {
2150 return 0;
2151 }
2152
2153 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2154 if (fd < 0) {
2155 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2156 strerror(errno));
2157 return -1;
2158 }
2159
2160 size = MIN(ret.rma_size, 256ul << 20);
2161
2162 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2163 if (*rma == MAP_FAILED) {
2164 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2165 return -1;
2166 };
2167
2168 return size;
2169 }
2170
2171 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2172 {
2173 struct kvm_ppc_smmu_info info;
2174 long rampagesize, best_page_shift;
2175 int i;
2176
2177 if (cap_ppc_rma >= 2) {
2178 return current_size;
2179 }
2180
2181 /* Find the largest hardware supported page size that's less than
2182 * or equal to the (logical) backing page size of guest RAM */
2183 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2184 rampagesize = getrampagesize();
2185 best_page_shift = 0;
2186
2187 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2188 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2189
2190 if (!sps->page_shift) {
2191 continue;
2192 }
2193
2194 if ((sps->page_shift > best_page_shift)
2195 && ((1UL << sps->page_shift) <= rampagesize)) {
2196 best_page_shift = sps->page_shift;
2197 }
2198 }
2199
2200 return MIN(current_size,
2201 1ULL << (best_page_shift + hash_shift - 7));
2202 }
2203 #endif
2204
2205 bool kvmppc_spapr_use_multitce(void)
2206 {
2207 return cap_spapr_multitce;
2208 }
2209
2210 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2211 bool need_vfio)
2212 {
2213 struct kvm_create_spapr_tce args = {
2214 .liobn = liobn,
2215 .window_size = window_size,
2216 };
2217 long len;
2218 int fd;
2219 void *table;
2220
2221 /* Must set fd to -1 so we don't try to munmap when called for
2222 * destroying the table, which the upper layers -will- do
2223 */
2224 *pfd = -1;
2225 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2226 return NULL;
2227 }
2228
2229 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2230 if (fd < 0) {
2231 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2232 liobn);
2233 return NULL;
2234 }
2235
2236 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2237 /* FIXME: round this up to page size */
2238
2239 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2240 if (table == MAP_FAILED) {
2241 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2242 liobn);
2243 close(fd);
2244 return NULL;
2245 }
2246
2247 *pfd = fd;
2248 return table;
2249 }
2250
2251 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2252 {
2253 long len;
2254
2255 if (fd < 0) {
2256 return -1;
2257 }
2258
2259 len = nb_table * sizeof(uint64_t);
2260 if ((munmap(table, len) < 0) ||
2261 (close(fd) < 0)) {
2262 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2263 strerror(errno));
2264 /* Leak the table */
2265 }
2266
2267 return 0;
2268 }
2269
2270 int kvmppc_reset_htab(int shift_hint)
2271 {
2272 uint32_t shift = shift_hint;
2273
2274 if (!kvm_enabled()) {
2275 /* Full emulation, tell caller to allocate htab itself */
2276 return 0;
2277 }
2278 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2279 int ret;
2280 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2281 if (ret == -ENOTTY) {
2282 /* At least some versions of PR KVM advertise the
2283 * capability, but don't implement the ioctl(). Oops.
2284 * Return 0 so that we allocate the htab in qemu, as is
2285 * correct for PR. */
2286 return 0;
2287 } else if (ret < 0) {
2288 return ret;
2289 }
2290 return shift;
2291 }
2292
2293 /* We have a kernel that predates the htab reset calls. For PR
2294 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2295 * this era, it has allocated a 16MB fixed size hash table already. */
2296 if (kvmppc_is_pr(kvm_state)) {
2297 /* PR - tell caller to allocate htab */
2298 return 0;
2299 } else {
2300 /* HV - assume 16MB kernel allocated htab */
2301 return 24;
2302 }
2303 }
2304
2305 static inline uint32_t mfpvr(void)
2306 {
2307 uint32_t pvr;
2308
2309 asm ("mfpvr %0"
2310 : "=r"(pvr));
2311 return pvr;
2312 }
2313
2314 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2315 {
2316 if (on) {
2317 *word |= flags;
2318 } else {
2319 *word &= ~flags;
2320 }
2321 }
2322
2323 static void kvmppc_host_cpu_initfn(Object *obj)
2324 {
2325 assert(kvm_enabled());
2326 }
2327
2328 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2329 {
2330 DeviceClass *dc = DEVICE_CLASS(oc);
2331 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2332 uint32_t vmx = kvmppc_get_vmx();
2333 uint32_t dfp = kvmppc_get_dfp();
2334 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2335 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2336
2337 /* Now fix up the class with information we can query from the host */
2338 pcc->pvr = mfpvr();
2339
2340 if (vmx != -1) {
2341 /* Only override when we know what the host supports */
2342 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2343 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2344 }
2345 if (dfp != -1) {
2346 /* Only override when we know what the host supports */
2347 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2348 }
2349
2350 if (dcache_size != -1) {
2351 pcc->l1_dcache_size = dcache_size;
2352 }
2353
2354 if (icache_size != -1) {
2355 pcc->l1_icache_size = icache_size;
2356 }
2357
2358 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2359 dc->cannot_destroy_with_object_finalize_yet = true;
2360 }
2361
2362 bool kvmppc_has_cap_epr(void)
2363 {
2364 return cap_epr;
2365 }
2366
2367 bool kvmppc_has_cap_htab_fd(void)
2368 {
2369 return cap_htab_fd;
2370 }
2371
2372 bool kvmppc_has_cap_fixup_hcalls(void)
2373 {
2374 return cap_fixup_hcalls;
2375 }
2376
2377 bool kvmppc_has_cap_htm(void)
2378 {
2379 return cap_htm;
2380 }
2381
2382 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2383 {
2384 ObjectClass *oc = OBJECT_CLASS(pcc);
2385
2386 while (oc && !object_class_is_abstract(oc)) {
2387 oc = object_class_get_parent(oc);
2388 }
2389 assert(oc);
2390
2391 return POWERPC_CPU_CLASS(oc);
2392 }
2393
2394 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2395 {
2396 uint32_t host_pvr = mfpvr();
2397 PowerPCCPUClass *pvr_pcc;
2398
2399 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2400 if (pvr_pcc == NULL) {
2401 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2402 }
2403
2404 return pvr_pcc;
2405 }
2406
2407 static int kvm_ppc_register_host_cpu_type(void)
2408 {
2409 TypeInfo type_info = {
2410 .name = TYPE_HOST_POWERPC_CPU,
2411 .instance_init = kvmppc_host_cpu_initfn,
2412 .class_init = kvmppc_host_cpu_class_init,
2413 };
2414 PowerPCCPUClass *pvr_pcc;
2415 DeviceClass *dc;
2416 int i;
2417
2418 pvr_pcc = kvm_ppc_get_host_cpu_class();
2419 if (pvr_pcc == NULL) {
2420 return -1;
2421 }
2422 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2423 type_register(&type_info);
2424
2425 #if defined(TARGET_PPC64)
2426 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427 type_info.parent = TYPE_SPAPR_CPU_CORE,
2428 type_info.instance_size = sizeof(sPAPRCPUCore);
2429 type_info.instance_init = NULL;
2430 type_info.class_init = spapr_cpu_core_class_init;
2431 type_info.class_data = (void *) "host";
2432 type_register(&type_info);
2433 g_free((void *)type_info.name);
2434 #endif
2435
2436 /*
2437 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2438 * we want "POWER8" to be a "family" alias that points to the current
2439 * host CPU type, too)
2440 */
2441 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2442 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2443 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2444 ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2445 char *suffix;
2446
2447 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2448 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2449 if (suffix) {
2450 *suffix = 0;
2451 }
2452 ppc_cpu_aliases[i].oc = oc;
2453 break;
2454 }
2455 }
2456
2457 return 0;
2458 }
2459
2460 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2461 {
2462 struct kvm_rtas_token_args args = {
2463 .token = token,
2464 };
2465
2466 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2467 return -ENOENT;
2468 }
2469
2470 strncpy(args.name, function, sizeof(args.name));
2471
2472 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2473 }
2474
2475 int kvmppc_get_htab_fd(bool write)
2476 {
2477 struct kvm_get_htab_fd s = {
2478 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2479 .start_index = 0,
2480 };
2481
2482 if (!cap_htab_fd) {
2483 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2484 return -1;
2485 }
2486
2487 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2488 }
2489
2490 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2491 {
2492 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2493 uint8_t buf[bufsize];
2494 ssize_t rc;
2495
2496 do {
2497 rc = read(fd, buf, bufsize);
2498 if (rc < 0) {
2499 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2500 strerror(errno));
2501 return rc;
2502 } else if (rc) {
2503 uint8_t *buffer = buf;
2504 ssize_t n = rc;
2505 while (n) {
2506 struct kvm_get_htab_header *head =
2507 (struct kvm_get_htab_header *) buffer;
2508 size_t chunksize = sizeof(*head) +
2509 HASH_PTE_SIZE_64 * head->n_valid;
2510
2511 qemu_put_be32(f, head->index);
2512 qemu_put_be16(f, head->n_valid);
2513 qemu_put_be16(f, head->n_invalid);
2514 qemu_put_buffer(f, (void *)(head + 1),
2515 HASH_PTE_SIZE_64 * head->n_valid);
2516
2517 buffer += chunksize;
2518 n -= chunksize;
2519 }
2520 }
2521 } while ((rc != 0)
2522 && ((max_ns < 0)
2523 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2524
2525 return (rc == 0) ? 1 : 0;
2526 }
2527
2528 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2529 uint16_t n_valid, uint16_t n_invalid)
2530 {
2531 struct kvm_get_htab_header *buf;
2532 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2533 ssize_t rc;
2534
2535 buf = alloca(chunksize);
2536 buf->index = index;
2537 buf->n_valid = n_valid;
2538 buf->n_invalid = n_invalid;
2539
2540 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2541
2542 rc = write(fd, buf, chunksize);
2543 if (rc < 0) {
2544 fprintf(stderr, "Error writing KVM hash table: %s\n",
2545 strerror(errno));
2546 return rc;
2547 }
2548 if (rc != chunksize) {
2549 /* We should never get a short write on a single chunk */
2550 fprintf(stderr, "Short write, restoring KVM hash table\n");
2551 return -1;
2552 }
2553 return 0;
2554 }
2555
2556 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2557 {
2558 return true;
2559 }
2560
2561 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2562 {
2563 return 1;
2564 }
2565
2566 int kvm_arch_on_sigbus(int code, void *addr)
2567 {
2568 return 1;
2569 }
2570
2571 void kvm_arch_init_irq_routing(KVMState *s)
2572 {
2573 }
2574
2575 struct kvm_get_htab_buf {
2576 struct kvm_get_htab_header header;
2577 /*
2578 * We require one extra byte for read
2579 */
2580 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2581 };
2582
2583 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2584 {
2585 int htab_fd;
2586 struct kvm_get_htab_fd ghf;
2587 struct kvm_get_htab_buf *hpte_buf;
2588
2589 ghf.flags = 0;
2590 ghf.start_index = pte_index;
2591 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2592 if (htab_fd < 0) {
2593 goto error_out;
2594 }
2595
2596 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2597 /*
2598 * Read the hpte group
2599 */
2600 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2601 goto out_close;
2602 }
2603
2604 close(htab_fd);
2605 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2606
2607 out_close:
2608 g_free(hpte_buf);
2609 close(htab_fd);
2610 error_out:
2611 return 0;
2612 }
2613
2614 void kvmppc_hash64_free_pteg(uint64_t token)
2615 {
2616 struct kvm_get_htab_buf *htab_buf;
2617
2618 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2619 hpte);
2620 g_free(htab_buf);
2621 return;
2622 }
2623
2624 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2625 target_ulong pte0, target_ulong pte1)
2626 {
2627 int htab_fd;
2628 struct kvm_get_htab_fd ghf;
2629 struct kvm_get_htab_buf hpte_buf;
2630
2631 ghf.flags = 0;
2632 ghf.start_index = 0; /* Ignored */
2633 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2634 if (htab_fd < 0) {
2635 goto error_out;
2636 }
2637
2638 hpte_buf.header.n_valid = 1;
2639 hpte_buf.header.n_invalid = 0;
2640 hpte_buf.header.index = pte_index;
2641 hpte_buf.hpte[0] = pte0;
2642 hpte_buf.hpte[1] = pte1;
2643 /*
2644 * Write the hpte entry.
2645 * CAUTION: write() has the warn_unused_result attribute. Hence we
2646 * need to check the return value, even though we do nothing.
2647 */
2648 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2649 goto out_close;
2650 }
2651
2652 out_close:
2653 close(htab_fd);
2654 return;
2655
2656 error_out:
2657 return;
2658 }
2659
2660 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2661 uint64_t address, uint32_t data, PCIDevice *dev)
2662 {
2663 return 0;
2664 }
2665
2666 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2667 int vector, PCIDevice *dev)
2668 {
2669 return 0;
2670 }
2671
2672 int kvm_arch_release_virq_post(int virq)
2673 {
2674 return 0;
2675 }
2676
2677 int kvm_arch_msi_data_to_gsi(uint32_t data)
2678 {
2679 return data & 0xffff;
2680 }
2681
2682 int kvmppc_enable_hwrng(void)
2683 {
2684 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2685 return -1;
2686 }
2687
2688 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2689 }