]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/kvm.c
KVM: do not use sigtimedwait to catch SIGBUS
[mirror_qemu.git] / target / ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "sysemu/numa.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #if defined(TARGET_PPC64)
49 #include "hw/ppc/spapr_cpu_core.h"
50 #endif
51
52 //#define DEBUG_KVM
53
54 #ifdef DEBUG_KVM
55 #define DPRINTF(fmt, ...) \
56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #else
58 #define DPRINTF(fmt, ...) \
59 do { } while (0)
60 #endif
61
62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
65 KVM_CAP_LAST_INFO
66 };
67
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_rma;
74 static int cap_spapr_tce;
75 static int cap_spapr_multitce;
76 static int cap_spapr_vfio;
77 static int cap_hior;
78 static int cap_one_reg;
79 static int cap_epr;
80 static int cap_ppc_watchdog;
81 static int cap_papr;
82 static int cap_htab_fd;
83 static int cap_fixup_hcalls;
84 static int cap_htm; /* Hardware transactional memory support */
85
86 static uint32_t debug_inst_opcode;
87
88 /* XXX We have a race condition where we actually have a level triggered
89 * interrupt, but the infrastructure can't expose that yet, so the guest
90 * takes but ignores it, goes to sleep and never gets notified that there's
91 * still an interrupt pending.
92 *
93 * As a quick workaround, let's just wake up again 20 ms after we injected
94 * an interrupt. That way we can assure that we're always reinjecting
95 * interrupts in case the guest swallowed them.
96 */
97 static QEMUTimer *idle_timer;
98
99 static void kvm_kick_cpu(void *opaque)
100 {
101 PowerPCCPU *cpu = opaque;
102
103 qemu_cpu_kick(CPU(cpu));
104 }
105
106 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
107 * should only be used for fallback tests - generally we should use
108 * explicit capabilities for the features we want, rather than
109 * assuming what is/isn't available depending on the KVM variant. */
110 static bool kvmppc_is_pr(KVMState *ks)
111 {
112 /* Assume KVM-PR if the GET_PVINFO capability is available */
113 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
114 }
115
116 static int kvm_ppc_register_host_cpu_type(void);
117
118 int kvm_arch_init(MachineState *ms, KVMState *s)
119 {
120 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
121 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
122 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
123 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
124 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
125 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
126 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
127 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
128 cap_spapr_vfio = false;
129 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
130 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
131 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
132 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
133 /* Note: we don't set cap_papr here, because this capability is
134 * only activated after this by kvmppc_set_papr() */
135 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
136 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
137 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
138
139 if (!cap_interrupt_level) {
140 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
141 "VM to stall at times!\n");
142 }
143
144 kvm_ppc_register_host_cpu_type();
145
146 return 0;
147 }
148
149 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
150 {
151 return 0;
152 }
153
154 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
155 {
156 CPUPPCState *cenv = &cpu->env;
157 CPUState *cs = CPU(cpu);
158 struct kvm_sregs sregs;
159 int ret;
160
161 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
162 /* What we're really trying to say is "if we're on BookE, we use
163 the native PVR for now". This is the only sane way to check
164 it though, so we potentially confuse users that they can run
165 BookE guests on BookS. Let's hope nobody dares enough :) */
166 return 0;
167 } else {
168 if (!cap_segstate) {
169 fprintf(stderr, "kvm error: missing PVR setting capability\n");
170 return -ENOSYS;
171 }
172 }
173
174 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
175 if (ret) {
176 return ret;
177 }
178
179 sregs.pvr = cenv->spr[SPR_PVR];
180 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
181 }
182
183 /* Set up a shared TLB array with KVM */
184 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
185 {
186 CPUPPCState *env = &cpu->env;
187 CPUState *cs = CPU(cpu);
188 struct kvm_book3e_206_tlb_params params = {};
189 struct kvm_config_tlb cfg = {};
190 unsigned int entries = 0;
191 int ret, i;
192
193 if (!kvm_enabled() ||
194 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
195 return 0;
196 }
197
198 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
199
200 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
201 params.tlb_sizes[i] = booke206_tlb_size(env, i);
202 params.tlb_ways[i] = booke206_tlb_ways(env, i);
203 entries += params.tlb_sizes[i];
204 }
205
206 assert(entries == env->nb_tlb);
207 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
208
209 env->tlb_dirty = true;
210
211 cfg.array = (uintptr_t)env->tlb.tlbm;
212 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
213 cfg.params = (uintptr_t)&params;
214 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
215
216 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
217 if (ret < 0) {
218 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
219 __func__, strerror(-ret));
220 return ret;
221 }
222
223 env->kvm_sw_tlb = true;
224 return 0;
225 }
226
227
228 #if defined(TARGET_PPC64)
229 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
230 struct kvm_ppc_smmu_info *info)
231 {
232 CPUPPCState *env = &cpu->env;
233 CPUState *cs = CPU(cpu);
234
235 memset(info, 0, sizeof(*info));
236
237 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
238 * need to "guess" what the supported page sizes are.
239 *
240 * For that to work we make a few assumptions:
241 *
242 * - Check whether we are running "PR" KVM which only supports 4K
243 * and 16M pages, but supports them regardless of the backing
244 * store characteritics. We also don't support 1T segments.
245 *
246 * This is safe as if HV KVM ever supports that capability or PR
247 * KVM grows supports for more page/segment sizes, those versions
248 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
249 * will not hit this fallback
250 *
251 * - Else we are running HV KVM. This means we only support page
252 * sizes that fit in the backing store. Additionally we only
253 * advertize 64K pages if the processor is ARCH 2.06 and we assume
254 * P7 encodings for the SLB and hash table. Here too, we assume
255 * support for any newer processor will mean a kernel that
256 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
257 * this fallback.
258 */
259 if (kvmppc_is_pr(cs->kvm_state)) {
260 /* No flags */
261 info->flags = 0;
262 info->slb_size = 64;
263
264 /* Standard 4k base page size segment */
265 info->sps[0].page_shift = 12;
266 info->sps[0].slb_enc = 0;
267 info->sps[0].enc[0].page_shift = 12;
268 info->sps[0].enc[0].pte_enc = 0;
269
270 /* Standard 16M large page size segment */
271 info->sps[1].page_shift = 24;
272 info->sps[1].slb_enc = SLB_VSID_L;
273 info->sps[1].enc[0].page_shift = 24;
274 info->sps[1].enc[0].pte_enc = 0;
275 } else {
276 int i = 0;
277
278 /* HV KVM has backing store size restrictions */
279 info->flags = KVM_PPC_PAGE_SIZES_REAL;
280
281 if (env->mmu_model & POWERPC_MMU_1TSEG) {
282 info->flags |= KVM_PPC_1T_SEGMENTS;
283 }
284
285 if (env->mmu_model == POWERPC_MMU_2_06 ||
286 env->mmu_model == POWERPC_MMU_2_07) {
287 info->slb_size = 32;
288 } else {
289 info->slb_size = 64;
290 }
291
292 /* Standard 4k base page size segment */
293 info->sps[i].page_shift = 12;
294 info->sps[i].slb_enc = 0;
295 info->sps[i].enc[0].page_shift = 12;
296 info->sps[i].enc[0].pte_enc = 0;
297 i++;
298
299 /* 64K on MMU 2.06 and later */
300 if (env->mmu_model == POWERPC_MMU_2_06 ||
301 env->mmu_model == POWERPC_MMU_2_07) {
302 info->sps[i].page_shift = 16;
303 info->sps[i].slb_enc = 0x110;
304 info->sps[i].enc[0].page_shift = 16;
305 info->sps[i].enc[0].pte_enc = 1;
306 i++;
307 }
308
309 /* Standard 16M large page size segment */
310 info->sps[i].page_shift = 24;
311 info->sps[i].slb_enc = SLB_VSID_L;
312 info->sps[i].enc[0].page_shift = 24;
313 info->sps[i].enc[0].pte_enc = 0;
314 }
315 }
316
317 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
318 {
319 CPUState *cs = CPU(cpu);
320 int ret;
321
322 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
323 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
324 if (ret == 0) {
325 return;
326 }
327 }
328
329 kvm_get_fallback_smmu_info(cpu, info);
330 }
331
332 static long gethugepagesize(const char *mem_path)
333 {
334 struct statfs fs;
335 int ret;
336
337 do {
338 ret = statfs(mem_path, &fs);
339 } while (ret != 0 && errno == EINTR);
340
341 if (ret != 0) {
342 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
343 strerror(errno));
344 exit(1);
345 }
346
347 #define HUGETLBFS_MAGIC 0x958458f6
348
349 if (fs.f_type != HUGETLBFS_MAGIC) {
350 /* Explicit mempath, but it's ordinary pages */
351 return getpagesize();
352 }
353
354 /* It's hugepage, return the huge page size */
355 return fs.f_bsize;
356 }
357
358 /*
359 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
360 * may or may not name the same files / on the same filesystem now as
361 * when we actually open and map them. Iterate over the file
362 * descriptors instead, and use qemu_fd_getpagesize().
363 */
364 static int find_max_supported_pagesize(Object *obj, void *opaque)
365 {
366 char *mem_path;
367 long *hpsize_min = opaque;
368
369 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
370 mem_path = object_property_get_str(obj, "mem-path", NULL);
371 if (mem_path) {
372 long hpsize = gethugepagesize(mem_path);
373 if (hpsize < *hpsize_min) {
374 *hpsize_min = hpsize;
375 }
376 } else {
377 *hpsize_min = getpagesize();
378 }
379 }
380
381 return 0;
382 }
383
384 static long getrampagesize(void)
385 {
386 long hpsize = LONG_MAX;
387 long mainrampagesize;
388 Object *memdev_root;
389
390 if (mem_path) {
391 mainrampagesize = gethugepagesize(mem_path);
392 } else {
393 mainrampagesize = getpagesize();
394 }
395
396 /* it's possible we have memory-backend objects with
397 * hugepage-backed RAM. these may get mapped into system
398 * address space via -numa parameters or memory hotplug
399 * hooks. we want to take these into account, but we
400 * also want to make sure these supported hugepage
401 * sizes are applicable across the entire range of memory
402 * we may boot from, so we take the min across all
403 * backends, and assume normal pages in cases where a
404 * backend isn't backed by hugepages.
405 */
406 memdev_root = object_resolve_path("/objects", NULL);
407 if (memdev_root) {
408 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
409 }
410 if (hpsize == LONG_MAX) {
411 /* No additional memory regions found ==> Report main RAM page size */
412 return mainrampagesize;
413 }
414
415 /* If NUMA is disabled or the NUMA nodes are not backed with a
416 * memory-backend, then there is at least one node using "normal" RAM,
417 * so if its page size is smaller we have got to report that size instead.
418 */
419 if (hpsize > mainrampagesize &&
420 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
421 static bool warned;
422 if (!warned) {
423 error_report("Huge page support disabled (n/a for main memory).");
424 warned = true;
425 }
426 return mainrampagesize;
427 }
428
429 return hpsize;
430 }
431
432 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
433 {
434 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
435 return true;
436 }
437
438 return (1ul << shift) <= rampgsize;
439 }
440
441 static long max_cpu_page_size;
442
443 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
444 {
445 static struct kvm_ppc_smmu_info smmu_info;
446 static bool has_smmu_info;
447 CPUPPCState *env = &cpu->env;
448 int iq, ik, jq, jk;
449 bool has_64k_pages = false;
450
451 /* We only handle page sizes for 64-bit server guests for now */
452 if (!(env->mmu_model & POWERPC_MMU_64)) {
453 return;
454 }
455
456 /* Collect MMU info from kernel if not already */
457 if (!has_smmu_info) {
458 kvm_get_smmu_info(cpu, &smmu_info);
459 has_smmu_info = true;
460 }
461
462 if (!max_cpu_page_size) {
463 max_cpu_page_size = getrampagesize();
464 }
465
466 /* Convert to QEMU form */
467 memset(&env->sps, 0, sizeof(env->sps));
468
469 /* If we have HV KVM, we need to forbid CI large pages if our
470 * host page size is smaller than 64K.
471 */
472 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
473 env->ci_large_pages = getpagesize() >= 0x10000;
474 }
475
476 /*
477 * XXX This loop should be an entry wide AND of the capabilities that
478 * the selected CPU has with the capabilities that KVM supports.
479 */
480 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
481 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
482 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
483
484 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
485 ksps->page_shift)) {
486 continue;
487 }
488 qsps->page_shift = ksps->page_shift;
489 qsps->slb_enc = ksps->slb_enc;
490 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
491 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
492 ksps->enc[jk].page_shift)) {
493 continue;
494 }
495 if (ksps->enc[jk].page_shift == 16) {
496 has_64k_pages = true;
497 }
498 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
499 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
500 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
501 break;
502 }
503 }
504 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
505 break;
506 }
507 }
508 env->slb_nr = smmu_info.slb_size;
509 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
510 env->mmu_model &= ~POWERPC_MMU_1TSEG;
511 }
512 if (!has_64k_pages) {
513 env->mmu_model &= ~POWERPC_MMU_64K;
514 }
515 }
516
517 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
518 {
519 Object *mem_obj = object_resolve_path(obj_path, NULL);
520 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
521 long pagesize;
522
523 if (mempath) {
524 pagesize = gethugepagesize(mempath);
525 } else {
526 pagesize = getpagesize();
527 }
528
529 return pagesize >= max_cpu_page_size;
530 }
531
532 #else /* defined (TARGET_PPC64) */
533
534 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
535 {
536 }
537
538 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
539 {
540 return true;
541 }
542
543 #endif /* !defined (TARGET_PPC64) */
544
545 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
546 {
547 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
548 }
549
550 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
551 * book3s supports only 1 watchpoint, so array size
552 * of 4 is sufficient for now.
553 */
554 #define MAX_HW_BKPTS 4
555
556 static struct HWBreakpoint {
557 target_ulong addr;
558 int type;
559 } hw_debug_points[MAX_HW_BKPTS];
560
561 static CPUWatchpoint hw_watchpoint;
562
563 /* Default there is no breakpoint and watchpoint supported */
564 static int max_hw_breakpoint;
565 static int max_hw_watchpoint;
566 static int nb_hw_breakpoint;
567 static int nb_hw_watchpoint;
568
569 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
570 {
571 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
572 max_hw_breakpoint = 2;
573 max_hw_watchpoint = 2;
574 }
575
576 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
577 fprintf(stderr, "Error initializing h/w breakpoints\n");
578 return;
579 }
580 }
581
582 int kvm_arch_init_vcpu(CPUState *cs)
583 {
584 PowerPCCPU *cpu = POWERPC_CPU(cs);
585 CPUPPCState *cenv = &cpu->env;
586 int ret;
587
588 /* Gather server mmu info from KVM and update the CPU state */
589 kvm_fixup_page_sizes(cpu);
590
591 /* Synchronize sregs with kvm */
592 ret = kvm_arch_sync_sregs(cpu);
593 if (ret) {
594 if (ret == -EINVAL) {
595 error_report("Register sync failed... If you're using kvm-hv.ko,"
596 " only \"-cpu host\" is possible");
597 }
598 return ret;
599 }
600
601 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
602
603 switch (cenv->mmu_model) {
604 case POWERPC_MMU_BOOKE206:
605 /* This target supports access to KVM's guest TLB */
606 ret = kvm_booke206_tlb_init(cpu);
607 break;
608 case POWERPC_MMU_2_07:
609 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
610 /* KVM-HV has transactional memory on POWER8 also without the
611 * KVM_CAP_PPC_HTM extension, so enable it here instead. */
612 cap_htm = true;
613 }
614 break;
615 default:
616 break;
617 }
618
619 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
620 kvmppc_hw_debug_points_init(cenv);
621
622 return ret;
623 }
624
625 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
626 {
627 CPUPPCState *env = &cpu->env;
628 CPUState *cs = CPU(cpu);
629 struct kvm_dirty_tlb dirty_tlb;
630 unsigned char *bitmap;
631 int ret;
632
633 if (!env->kvm_sw_tlb) {
634 return;
635 }
636
637 bitmap = g_malloc((env->nb_tlb + 7) / 8);
638 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
639
640 dirty_tlb.bitmap = (uintptr_t)bitmap;
641 dirty_tlb.num_dirty = env->nb_tlb;
642
643 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
644 if (ret) {
645 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
646 __func__, strerror(-ret));
647 }
648
649 g_free(bitmap);
650 }
651
652 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
653 {
654 PowerPCCPU *cpu = POWERPC_CPU(cs);
655 CPUPPCState *env = &cpu->env;
656 union {
657 uint32_t u32;
658 uint64_t u64;
659 } val;
660 struct kvm_one_reg reg = {
661 .id = id,
662 .addr = (uintptr_t) &val,
663 };
664 int ret;
665
666 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
667 if (ret != 0) {
668 trace_kvm_failed_spr_get(spr, strerror(errno));
669 } else {
670 switch (id & KVM_REG_SIZE_MASK) {
671 case KVM_REG_SIZE_U32:
672 env->spr[spr] = val.u32;
673 break;
674
675 case KVM_REG_SIZE_U64:
676 env->spr[spr] = val.u64;
677 break;
678
679 default:
680 /* Don't handle this size yet */
681 abort();
682 }
683 }
684 }
685
686 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
687 {
688 PowerPCCPU *cpu = POWERPC_CPU(cs);
689 CPUPPCState *env = &cpu->env;
690 union {
691 uint32_t u32;
692 uint64_t u64;
693 } val;
694 struct kvm_one_reg reg = {
695 .id = id,
696 .addr = (uintptr_t) &val,
697 };
698 int ret;
699
700 switch (id & KVM_REG_SIZE_MASK) {
701 case KVM_REG_SIZE_U32:
702 val.u32 = env->spr[spr];
703 break;
704
705 case KVM_REG_SIZE_U64:
706 val.u64 = env->spr[spr];
707 break;
708
709 default:
710 /* Don't handle this size yet */
711 abort();
712 }
713
714 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
715 if (ret != 0) {
716 trace_kvm_failed_spr_set(spr, strerror(errno));
717 }
718 }
719
720 static int kvm_put_fp(CPUState *cs)
721 {
722 PowerPCCPU *cpu = POWERPC_CPU(cs);
723 CPUPPCState *env = &cpu->env;
724 struct kvm_one_reg reg;
725 int i;
726 int ret;
727
728 if (env->insns_flags & PPC_FLOAT) {
729 uint64_t fpscr = env->fpscr;
730 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
731
732 reg.id = KVM_REG_PPC_FPSCR;
733 reg.addr = (uintptr_t)&fpscr;
734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
737 return ret;
738 }
739
740 for (i = 0; i < 32; i++) {
741 uint64_t vsr[2];
742
743 #ifdef HOST_WORDS_BIGENDIAN
744 vsr[0] = float64_val(env->fpr[i]);
745 vsr[1] = env->vsr[i];
746 #else
747 vsr[0] = env->vsr[i];
748 vsr[1] = float64_val(env->fpr[i]);
749 #endif
750 reg.addr = (uintptr_t) &vsr;
751 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
752
753 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
754 if (ret < 0) {
755 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
756 i, strerror(errno));
757 return ret;
758 }
759 }
760 }
761
762 if (env->insns_flags & PPC_ALTIVEC) {
763 reg.id = KVM_REG_PPC_VSCR;
764 reg.addr = (uintptr_t)&env->vscr;
765 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
766 if (ret < 0) {
767 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
768 return ret;
769 }
770
771 for (i = 0; i < 32; i++) {
772 reg.id = KVM_REG_PPC_VR(i);
773 reg.addr = (uintptr_t)&env->avr[i];
774 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
775 if (ret < 0) {
776 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
777 return ret;
778 }
779 }
780 }
781
782 return 0;
783 }
784
785 static int kvm_get_fp(CPUState *cs)
786 {
787 PowerPCCPU *cpu = POWERPC_CPU(cs);
788 CPUPPCState *env = &cpu->env;
789 struct kvm_one_reg reg;
790 int i;
791 int ret;
792
793 if (env->insns_flags & PPC_FLOAT) {
794 uint64_t fpscr;
795 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
796
797 reg.id = KVM_REG_PPC_FPSCR;
798 reg.addr = (uintptr_t)&fpscr;
799 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
800 if (ret < 0) {
801 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
802 return ret;
803 } else {
804 env->fpscr = fpscr;
805 }
806
807 for (i = 0; i < 32; i++) {
808 uint64_t vsr[2];
809
810 reg.addr = (uintptr_t) &vsr;
811 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
812
813 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
814 if (ret < 0) {
815 DPRINTF("Unable to get %s%d from KVM: %s\n",
816 vsx ? "VSR" : "FPR", i, strerror(errno));
817 return ret;
818 } else {
819 #ifdef HOST_WORDS_BIGENDIAN
820 env->fpr[i] = vsr[0];
821 if (vsx) {
822 env->vsr[i] = vsr[1];
823 }
824 #else
825 env->fpr[i] = vsr[1];
826 if (vsx) {
827 env->vsr[i] = vsr[0];
828 }
829 #endif
830 }
831 }
832 }
833
834 if (env->insns_flags & PPC_ALTIVEC) {
835 reg.id = KVM_REG_PPC_VSCR;
836 reg.addr = (uintptr_t)&env->vscr;
837 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
838 if (ret < 0) {
839 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
840 return ret;
841 }
842
843 for (i = 0; i < 32; i++) {
844 reg.id = KVM_REG_PPC_VR(i);
845 reg.addr = (uintptr_t)&env->avr[i];
846 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
847 if (ret < 0) {
848 DPRINTF("Unable to get VR%d from KVM: %s\n",
849 i, strerror(errno));
850 return ret;
851 }
852 }
853 }
854
855 return 0;
856 }
857
858 #if defined(TARGET_PPC64)
859 static int kvm_get_vpa(CPUState *cs)
860 {
861 PowerPCCPU *cpu = POWERPC_CPU(cs);
862 CPUPPCState *env = &cpu->env;
863 struct kvm_one_reg reg;
864 int ret;
865
866 reg.id = KVM_REG_PPC_VPA_ADDR;
867 reg.addr = (uintptr_t)&env->vpa_addr;
868 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
869 if (ret < 0) {
870 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
871 return ret;
872 }
873
874 assert((uintptr_t)&env->slb_shadow_size
875 == ((uintptr_t)&env->slb_shadow_addr + 8));
876 reg.id = KVM_REG_PPC_VPA_SLB;
877 reg.addr = (uintptr_t)&env->slb_shadow_addr;
878 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
879 if (ret < 0) {
880 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
881 strerror(errno));
882 return ret;
883 }
884
885 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
886 reg.id = KVM_REG_PPC_VPA_DTL;
887 reg.addr = (uintptr_t)&env->dtl_addr;
888 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
889 if (ret < 0) {
890 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
891 strerror(errno));
892 return ret;
893 }
894
895 return 0;
896 }
897
898 static int kvm_put_vpa(CPUState *cs)
899 {
900 PowerPCCPU *cpu = POWERPC_CPU(cs);
901 CPUPPCState *env = &cpu->env;
902 struct kvm_one_reg reg;
903 int ret;
904
905 /* SLB shadow or DTL can't be registered unless a master VPA is
906 * registered. That means when restoring state, if a VPA *is*
907 * registered, we need to set that up first. If not, we need to
908 * deregister the others before deregistering the master VPA */
909 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
910
911 if (env->vpa_addr) {
912 reg.id = KVM_REG_PPC_VPA_ADDR;
913 reg.addr = (uintptr_t)&env->vpa_addr;
914 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
915 if (ret < 0) {
916 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
917 return ret;
918 }
919 }
920
921 assert((uintptr_t)&env->slb_shadow_size
922 == ((uintptr_t)&env->slb_shadow_addr + 8));
923 reg.id = KVM_REG_PPC_VPA_SLB;
924 reg.addr = (uintptr_t)&env->slb_shadow_addr;
925 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
926 if (ret < 0) {
927 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
928 return ret;
929 }
930
931 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
932 reg.id = KVM_REG_PPC_VPA_DTL;
933 reg.addr = (uintptr_t)&env->dtl_addr;
934 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
935 if (ret < 0) {
936 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
937 strerror(errno));
938 return ret;
939 }
940
941 if (!env->vpa_addr) {
942 reg.id = KVM_REG_PPC_VPA_ADDR;
943 reg.addr = (uintptr_t)&env->vpa_addr;
944 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
945 if (ret < 0) {
946 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
947 return ret;
948 }
949 }
950
951 return 0;
952 }
953 #endif /* TARGET_PPC64 */
954
955 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
956 {
957 CPUPPCState *env = &cpu->env;
958 struct kvm_sregs sregs;
959 int i;
960
961 sregs.pvr = env->spr[SPR_PVR];
962
963 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
964
965 /* Sync SLB */
966 #ifdef TARGET_PPC64
967 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
968 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
969 if (env->slb[i].esid & SLB_ESID_V) {
970 sregs.u.s.ppc64.slb[i].slbe |= i;
971 }
972 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
973 }
974 #endif
975
976 /* Sync SRs */
977 for (i = 0; i < 16; i++) {
978 sregs.u.s.ppc32.sr[i] = env->sr[i];
979 }
980
981 /* Sync BATs */
982 for (i = 0; i < 8; i++) {
983 /* Beware. We have to swap upper and lower bits here */
984 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
985 | env->DBAT[1][i];
986 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
987 | env->IBAT[1][i];
988 }
989
990 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
991 }
992
993 int kvm_arch_put_registers(CPUState *cs, int level)
994 {
995 PowerPCCPU *cpu = POWERPC_CPU(cs);
996 CPUPPCState *env = &cpu->env;
997 struct kvm_regs regs;
998 int ret;
999 int i;
1000
1001 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1002 if (ret < 0) {
1003 return ret;
1004 }
1005
1006 regs.ctr = env->ctr;
1007 regs.lr = env->lr;
1008 regs.xer = cpu_read_xer(env);
1009 regs.msr = env->msr;
1010 regs.pc = env->nip;
1011
1012 regs.srr0 = env->spr[SPR_SRR0];
1013 regs.srr1 = env->spr[SPR_SRR1];
1014
1015 regs.sprg0 = env->spr[SPR_SPRG0];
1016 regs.sprg1 = env->spr[SPR_SPRG1];
1017 regs.sprg2 = env->spr[SPR_SPRG2];
1018 regs.sprg3 = env->spr[SPR_SPRG3];
1019 regs.sprg4 = env->spr[SPR_SPRG4];
1020 regs.sprg5 = env->spr[SPR_SPRG5];
1021 regs.sprg6 = env->spr[SPR_SPRG6];
1022 regs.sprg7 = env->spr[SPR_SPRG7];
1023
1024 regs.pid = env->spr[SPR_BOOKE_PID];
1025
1026 for (i = 0;i < 32; i++)
1027 regs.gpr[i] = env->gpr[i];
1028
1029 regs.cr = 0;
1030 for (i = 0; i < 8; i++) {
1031 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1032 }
1033
1034 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1035 if (ret < 0)
1036 return ret;
1037
1038 kvm_put_fp(cs);
1039
1040 if (env->tlb_dirty) {
1041 kvm_sw_tlb_put(cpu);
1042 env->tlb_dirty = false;
1043 }
1044
1045 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1046 ret = kvmppc_put_books_sregs(cpu);
1047 if (ret < 0) {
1048 return ret;
1049 }
1050 }
1051
1052 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1053 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1054 }
1055
1056 if (cap_one_reg) {
1057 int i;
1058
1059 /* We deliberately ignore errors here, for kernels which have
1060 * the ONE_REG calls, but don't support the specific
1061 * registers, there's a reasonable chance things will still
1062 * work, at least until we try to migrate. */
1063 for (i = 0; i < 1024; i++) {
1064 uint64_t id = env->spr_cb[i].one_reg_id;
1065
1066 if (id != 0) {
1067 kvm_put_one_spr(cs, id, i);
1068 }
1069 }
1070
1071 #ifdef TARGET_PPC64
1072 if (msr_ts) {
1073 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1074 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1075 }
1076 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1077 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1078 }
1079 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1080 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1081 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1082 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1083 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1084 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1085 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1086 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1087 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1088 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1089 }
1090
1091 if (cap_papr) {
1092 if (kvm_put_vpa(cs) < 0) {
1093 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1094 }
1095 }
1096
1097 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1098 #endif /* TARGET_PPC64 */
1099 }
1100
1101 return ret;
1102 }
1103
1104 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1105 {
1106 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1107 }
1108
1109 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1110 {
1111 CPUPPCState *env = &cpu->env;
1112 struct kvm_sregs sregs;
1113 int ret;
1114
1115 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1116 if (ret < 0) {
1117 return ret;
1118 }
1119
1120 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1121 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1122 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1123 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1124 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1125 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1126 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1127 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1128 env->spr[SPR_DECR] = sregs.u.e.dec;
1129 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1130 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1131 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1132 }
1133
1134 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1135 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1136 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1137 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1138 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1139 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1140 }
1141
1142 if (sregs.u.e.features & KVM_SREGS_E_64) {
1143 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1144 }
1145
1146 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1147 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1148 }
1149
1150 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1151 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1152 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1153 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1154 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1155 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1156 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1157 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1158 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1159 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1160 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1161 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1162 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1163 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1164 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1165 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1166 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1167 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1168 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1169 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1170 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1171 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1172 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1173 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1174 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1175 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1176 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1177 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1178 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1179 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1180 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1181 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1182 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1183
1184 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1185 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1186 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1187 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1188 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1189 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1190 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1191 }
1192
1193 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1194 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1195 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1196 }
1197
1198 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1199 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1200 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1201 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1202 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1203 }
1204 }
1205
1206 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1207 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1208 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1209 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1210 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1211 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1212 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1213 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1214 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1215 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1216 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1217 }
1218
1219 if (sregs.u.e.features & KVM_SREGS_EXP) {
1220 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1221 }
1222
1223 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1224 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1225 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1226 }
1227
1228 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1229 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1230 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1231 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1232
1233 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1234 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1235 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1236 }
1237 }
1238
1239 return 0;
1240 }
1241
1242 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1243 {
1244 CPUPPCState *env = &cpu->env;
1245 struct kvm_sregs sregs;
1246 int ret;
1247 int i;
1248
1249 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1250 if (ret < 0) {
1251 return ret;
1252 }
1253
1254 if (!cpu->vhyp) {
1255 ppc_store_sdr1(env, sregs.u.s.sdr1);
1256 }
1257
1258 /* Sync SLB */
1259 #ifdef TARGET_PPC64
1260 /*
1261 * The packed SLB array we get from KVM_GET_SREGS only contains
1262 * information about valid entries. So we flush our internal copy
1263 * to get rid of stale ones, then put all valid SLB entries back
1264 * in.
1265 */
1266 memset(env->slb, 0, sizeof(env->slb));
1267 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1268 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1269 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1270 /*
1271 * Only restore valid entries
1272 */
1273 if (rb & SLB_ESID_V) {
1274 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1275 }
1276 }
1277 #endif
1278
1279 /* Sync SRs */
1280 for (i = 0; i < 16; i++) {
1281 env->sr[i] = sregs.u.s.ppc32.sr[i];
1282 }
1283
1284 /* Sync BATs */
1285 for (i = 0; i < 8; i++) {
1286 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1287 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1288 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1289 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1290 }
1291
1292 return 0;
1293 }
1294
1295 int kvm_arch_get_registers(CPUState *cs)
1296 {
1297 PowerPCCPU *cpu = POWERPC_CPU(cs);
1298 CPUPPCState *env = &cpu->env;
1299 struct kvm_regs regs;
1300 uint32_t cr;
1301 int i, ret;
1302
1303 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1304 if (ret < 0)
1305 return ret;
1306
1307 cr = regs.cr;
1308 for (i = 7; i >= 0; i--) {
1309 env->crf[i] = cr & 15;
1310 cr >>= 4;
1311 }
1312
1313 env->ctr = regs.ctr;
1314 env->lr = regs.lr;
1315 cpu_write_xer(env, regs.xer);
1316 env->msr = regs.msr;
1317 env->nip = regs.pc;
1318
1319 env->spr[SPR_SRR0] = regs.srr0;
1320 env->spr[SPR_SRR1] = regs.srr1;
1321
1322 env->spr[SPR_SPRG0] = regs.sprg0;
1323 env->spr[SPR_SPRG1] = regs.sprg1;
1324 env->spr[SPR_SPRG2] = regs.sprg2;
1325 env->spr[SPR_SPRG3] = regs.sprg3;
1326 env->spr[SPR_SPRG4] = regs.sprg4;
1327 env->spr[SPR_SPRG5] = regs.sprg5;
1328 env->spr[SPR_SPRG6] = regs.sprg6;
1329 env->spr[SPR_SPRG7] = regs.sprg7;
1330
1331 env->spr[SPR_BOOKE_PID] = regs.pid;
1332
1333 for (i = 0;i < 32; i++)
1334 env->gpr[i] = regs.gpr[i];
1335
1336 kvm_get_fp(cs);
1337
1338 if (cap_booke_sregs) {
1339 ret = kvmppc_get_booke_sregs(cpu);
1340 if (ret < 0) {
1341 return ret;
1342 }
1343 }
1344
1345 if (cap_segstate) {
1346 ret = kvmppc_get_books_sregs(cpu);
1347 if (ret < 0) {
1348 return ret;
1349 }
1350 }
1351
1352 if (cap_hior) {
1353 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1354 }
1355
1356 if (cap_one_reg) {
1357 int i;
1358
1359 /* We deliberately ignore errors here, for kernels which have
1360 * the ONE_REG calls, but don't support the specific
1361 * registers, there's a reasonable chance things will still
1362 * work, at least until we try to migrate. */
1363 for (i = 0; i < 1024; i++) {
1364 uint64_t id = env->spr_cb[i].one_reg_id;
1365
1366 if (id != 0) {
1367 kvm_get_one_spr(cs, id, i);
1368 }
1369 }
1370
1371 #ifdef TARGET_PPC64
1372 if (msr_ts) {
1373 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1374 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1375 }
1376 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1377 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1378 }
1379 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1380 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1381 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1382 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1383 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1384 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1385 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1386 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1387 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1388 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1389 }
1390
1391 if (cap_papr) {
1392 if (kvm_get_vpa(cs) < 0) {
1393 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1394 }
1395 }
1396
1397 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1398 #endif
1399 }
1400
1401 return 0;
1402 }
1403
1404 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1405 {
1406 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1407
1408 if (irq != PPC_INTERRUPT_EXT) {
1409 return 0;
1410 }
1411
1412 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1413 return 0;
1414 }
1415
1416 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1417
1418 return 0;
1419 }
1420
1421 #if defined(TARGET_PPCEMB)
1422 #define PPC_INPUT_INT PPC40x_INPUT_INT
1423 #elif defined(TARGET_PPC64)
1424 #define PPC_INPUT_INT PPC970_INPUT_INT
1425 #else
1426 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1427 #endif
1428
1429 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1430 {
1431 PowerPCCPU *cpu = POWERPC_CPU(cs);
1432 CPUPPCState *env = &cpu->env;
1433 int r;
1434 unsigned irq;
1435
1436 qemu_mutex_lock_iothread();
1437
1438 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1439 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1440 if (!cap_interrupt_level &&
1441 run->ready_for_interrupt_injection &&
1442 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1443 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1444 {
1445 /* For now KVM disregards the 'irq' argument. However, in the
1446 * future KVM could cache it in-kernel to avoid a heavyweight exit
1447 * when reading the UIC.
1448 */
1449 irq = KVM_INTERRUPT_SET;
1450
1451 DPRINTF("injected interrupt %d\n", irq);
1452 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1453 if (r < 0) {
1454 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1455 }
1456
1457 /* Always wake up soon in case the interrupt was level based */
1458 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1459 (NANOSECONDS_PER_SECOND / 50));
1460 }
1461
1462 /* We don't know if there are more interrupts pending after this. However,
1463 * the guest will return to userspace in the course of handling this one
1464 * anyways, so we will get a chance to deliver the rest. */
1465
1466 qemu_mutex_unlock_iothread();
1467 }
1468
1469 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1470 {
1471 return MEMTXATTRS_UNSPECIFIED;
1472 }
1473
1474 int kvm_arch_process_async_events(CPUState *cs)
1475 {
1476 return cs->halted;
1477 }
1478
1479 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1480 {
1481 CPUState *cs = CPU(cpu);
1482 CPUPPCState *env = &cpu->env;
1483
1484 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1485 cs->halted = 1;
1486 cs->exception_index = EXCP_HLT;
1487 }
1488
1489 return 0;
1490 }
1491
1492 /* map dcr access to existing qemu dcr emulation */
1493 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1494 {
1495 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1496 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1497
1498 return 0;
1499 }
1500
1501 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1502 {
1503 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1504 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1505
1506 return 0;
1507 }
1508
1509 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1510 {
1511 /* Mixed endian case is not handled */
1512 uint32_t sc = debug_inst_opcode;
1513
1514 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1515 sizeof(sc), 0) ||
1516 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1517 return -EINVAL;
1518 }
1519
1520 return 0;
1521 }
1522
1523 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1524 {
1525 uint32_t sc;
1526
1527 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1528 sc != debug_inst_opcode ||
1529 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1530 sizeof(sc), 1)) {
1531 return -EINVAL;
1532 }
1533
1534 return 0;
1535 }
1536
1537 static int find_hw_breakpoint(target_ulong addr, int type)
1538 {
1539 int n;
1540
1541 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1542 <= ARRAY_SIZE(hw_debug_points));
1543
1544 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1545 if (hw_debug_points[n].addr == addr &&
1546 hw_debug_points[n].type == type) {
1547 return n;
1548 }
1549 }
1550
1551 return -1;
1552 }
1553
1554 static int find_hw_watchpoint(target_ulong addr, int *flag)
1555 {
1556 int n;
1557
1558 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1559 if (n >= 0) {
1560 *flag = BP_MEM_ACCESS;
1561 return n;
1562 }
1563
1564 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1565 if (n >= 0) {
1566 *flag = BP_MEM_WRITE;
1567 return n;
1568 }
1569
1570 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1571 if (n >= 0) {
1572 *flag = BP_MEM_READ;
1573 return n;
1574 }
1575
1576 return -1;
1577 }
1578
1579 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1580 target_ulong len, int type)
1581 {
1582 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1583 return -ENOBUFS;
1584 }
1585
1586 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1587 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1588
1589 switch (type) {
1590 case GDB_BREAKPOINT_HW:
1591 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1592 return -ENOBUFS;
1593 }
1594
1595 if (find_hw_breakpoint(addr, type) >= 0) {
1596 return -EEXIST;
1597 }
1598
1599 nb_hw_breakpoint++;
1600 break;
1601
1602 case GDB_WATCHPOINT_WRITE:
1603 case GDB_WATCHPOINT_READ:
1604 case GDB_WATCHPOINT_ACCESS:
1605 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1606 return -ENOBUFS;
1607 }
1608
1609 if (find_hw_breakpoint(addr, type) >= 0) {
1610 return -EEXIST;
1611 }
1612
1613 nb_hw_watchpoint++;
1614 break;
1615
1616 default:
1617 return -ENOSYS;
1618 }
1619
1620 return 0;
1621 }
1622
1623 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1624 target_ulong len, int type)
1625 {
1626 int n;
1627
1628 n = find_hw_breakpoint(addr, type);
1629 if (n < 0) {
1630 return -ENOENT;
1631 }
1632
1633 switch (type) {
1634 case GDB_BREAKPOINT_HW:
1635 nb_hw_breakpoint--;
1636 break;
1637
1638 case GDB_WATCHPOINT_WRITE:
1639 case GDB_WATCHPOINT_READ:
1640 case GDB_WATCHPOINT_ACCESS:
1641 nb_hw_watchpoint--;
1642 break;
1643
1644 default:
1645 return -ENOSYS;
1646 }
1647 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1648
1649 return 0;
1650 }
1651
1652 void kvm_arch_remove_all_hw_breakpoints(void)
1653 {
1654 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1655 }
1656
1657 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1658 {
1659 int n;
1660
1661 /* Software Breakpoint updates */
1662 if (kvm_sw_breakpoints_active(cs)) {
1663 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1664 }
1665
1666 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1667 <= ARRAY_SIZE(hw_debug_points));
1668 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1669
1670 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1671 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1672 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1673 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1674 switch (hw_debug_points[n].type) {
1675 case GDB_BREAKPOINT_HW:
1676 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1677 break;
1678 case GDB_WATCHPOINT_WRITE:
1679 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1680 break;
1681 case GDB_WATCHPOINT_READ:
1682 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1683 break;
1684 case GDB_WATCHPOINT_ACCESS:
1685 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1686 KVMPPC_DEBUG_WATCH_READ;
1687 break;
1688 default:
1689 cpu_abort(cs, "Unsupported breakpoint type\n");
1690 }
1691 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1692 }
1693 }
1694 }
1695
1696 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1697 {
1698 CPUState *cs = CPU(cpu);
1699 CPUPPCState *env = &cpu->env;
1700 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1701 int handle = 0;
1702 int n;
1703 int flag = 0;
1704
1705 if (cs->singlestep_enabled) {
1706 handle = 1;
1707 } else if (arch_info->status) {
1708 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1709 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1710 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1711 if (n >= 0) {
1712 handle = 1;
1713 }
1714 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1715 KVMPPC_DEBUG_WATCH_WRITE)) {
1716 n = find_hw_watchpoint(arch_info->address, &flag);
1717 if (n >= 0) {
1718 handle = 1;
1719 cs->watchpoint_hit = &hw_watchpoint;
1720 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1721 hw_watchpoint.flags = flag;
1722 }
1723 }
1724 }
1725 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1726 handle = 1;
1727 } else {
1728 /* QEMU is not able to handle debug exception, so inject
1729 * program exception to guest;
1730 * Yes program exception NOT debug exception !!
1731 * When QEMU is using debug resources then debug exception must
1732 * be always set. To achieve this we set MSR_DE and also set
1733 * MSRP_DEP so guest cannot change MSR_DE.
1734 * When emulating debug resource for guest we want guest
1735 * to control MSR_DE (enable/disable debug interrupt on need).
1736 * Supporting both configurations are NOT possible.
1737 * So the result is that we cannot share debug resources
1738 * between QEMU and Guest on BOOKE architecture.
1739 * In the current design QEMU gets the priority over guest,
1740 * this means that if QEMU is using debug resources then guest
1741 * cannot use them;
1742 * For software breakpoint QEMU uses a privileged instruction;
1743 * So there cannot be any reason that we are here for guest
1744 * set debug exception, only possibility is guest executed a
1745 * privileged / illegal instruction and that's why we are
1746 * injecting a program interrupt.
1747 */
1748
1749 cpu_synchronize_state(cs);
1750 /* env->nip is PC, so increment this by 4 to use
1751 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1752 */
1753 env->nip += 4;
1754 cs->exception_index = POWERPC_EXCP_PROGRAM;
1755 env->error_code = POWERPC_EXCP_INVAL;
1756 ppc_cpu_do_interrupt(cs);
1757 }
1758
1759 return handle;
1760 }
1761
1762 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1763 {
1764 PowerPCCPU *cpu = POWERPC_CPU(cs);
1765 CPUPPCState *env = &cpu->env;
1766 int ret;
1767
1768 qemu_mutex_lock_iothread();
1769
1770 switch (run->exit_reason) {
1771 case KVM_EXIT_DCR:
1772 if (run->dcr.is_write) {
1773 DPRINTF("handle dcr write\n");
1774 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1775 } else {
1776 DPRINTF("handle dcr read\n");
1777 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1778 }
1779 break;
1780 case KVM_EXIT_HLT:
1781 DPRINTF("handle halt\n");
1782 ret = kvmppc_handle_halt(cpu);
1783 break;
1784 #if defined(TARGET_PPC64)
1785 case KVM_EXIT_PAPR_HCALL:
1786 DPRINTF("handle PAPR hypercall\n");
1787 run->papr_hcall.ret = spapr_hypercall(cpu,
1788 run->papr_hcall.nr,
1789 run->papr_hcall.args);
1790 ret = 0;
1791 break;
1792 #endif
1793 case KVM_EXIT_EPR:
1794 DPRINTF("handle epr\n");
1795 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1796 ret = 0;
1797 break;
1798 case KVM_EXIT_WATCHDOG:
1799 DPRINTF("handle watchdog expiry\n");
1800 watchdog_perform_action();
1801 ret = 0;
1802 break;
1803
1804 case KVM_EXIT_DEBUG:
1805 DPRINTF("handle debug exception\n");
1806 if (kvm_handle_debug(cpu, run)) {
1807 ret = EXCP_DEBUG;
1808 break;
1809 }
1810 /* re-enter, this exception was guest-internal */
1811 ret = 0;
1812 break;
1813
1814 default:
1815 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1816 ret = -1;
1817 break;
1818 }
1819
1820 qemu_mutex_unlock_iothread();
1821 return ret;
1822 }
1823
1824 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1825 {
1826 CPUState *cs = CPU(cpu);
1827 uint32_t bits = tsr_bits;
1828 struct kvm_one_reg reg = {
1829 .id = KVM_REG_PPC_OR_TSR,
1830 .addr = (uintptr_t) &bits,
1831 };
1832
1833 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1834 }
1835
1836 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1837 {
1838
1839 CPUState *cs = CPU(cpu);
1840 uint32_t bits = tsr_bits;
1841 struct kvm_one_reg reg = {
1842 .id = KVM_REG_PPC_CLEAR_TSR,
1843 .addr = (uintptr_t) &bits,
1844 };
1845
1846 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1847 }
1848
1849 int kvmppc_set_tcr(PowerPCCPU *cpu)
1850 {
1851 CPUState *cs = CPU(cpu);
1852 CPUPPCState *env = &cpu->env;
1853 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1854
1855 struct kvm_one_reg reg = {
1856 .id = KVM_REG_PPC_TCR,
1857 .addr = (uintptr_t) &tcr,
1858 };
1859
1860 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1861 }
1862
1863 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1864 {
1865 CPUState *cs = CPU(cpu);
1866 int ret;
1867
1868 if (!kvm_enabled()) {
1869 return -1;
1870 }
1871
1872 if (!cap_ppc_watchdog) {
1873 printf("warning: KVM does not support watchdog");
1874 return -1;
1875 }
1876
1877 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1878 if (ret < 0) {
1879 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1880 __func__, strerror(-ret));
1881 return ret;
1882 }
1883
1884 return ret;
1885 }
1886
1887 static int read_cpuinfo(const char *field, char *value, int len)
1888 {
1889 FILE *f;
1890 int ret = -1;
1891 int field_len = strlen(field);
1892 char line[512];
1893
1894 f = fopen("/proc/cpuinfo", "r");
1895 if (!f) {
1896 return -1;
1897 }
1898
1899 do {
1900 if (!fgets(line, sizeof(line), f)) {
1901 break;
1902 }
1903 if (!strncmp(line, field, field_len)) {
1904 pstrcpy(value, len, line);
1905 ret = 0;
1906 break;
1907 }
1908 } while(*line);
1909
1910 fclose(f);
1911
1912 return ret;
1913 }
1914
1915 uint32_t kvmppc_get_tbfreq(void)
1916 {
1917 char line[512];
1918 char *ns;
1919 uint32_t retval = NANOSECONDS_PER_SECOND;
1920
1921 if (read_cpuinfo("timebase", line, sizeof(line))) {
1922 return retval;
1923 }
1924
1925 if (!(ns = strchr(line, ':'))) {
1926 return retval;
1927 }
1928
1929 ns++;
1930
1931 return atoi(ns);
1932 }
1933
1934 bool kvmppc_get_host_serial(char **value)
1935 {
1936 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1937 NULL);
1938 }
1939
1940 bool kvmppc_get_host_model(char **value)
1941 {
1942 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1943 }
1944
1945 /* Try to find a device tree node for a CPU with clock-frequency property */
1946 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1947 {
1948 struct dirent *dirp;
1949 DIR *dp;
1950
1951 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1952 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1953 return -1;
1954 }
1955
1956 buf[0] = '\0';
1957 while ((dirp = readdir(dp)) != NULL) {
1958 FILE *f;
1959 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1960 dirp->d_name);
1961 f = fopen(buf, "r");
1962 if (f) {
1963 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1964 fclose(f);
1965 break;
1966 }
1967 buf[0] = '\0';
1968 }
1969 closedir(dp);
1970 if (buf[0] == '\0') {
1971 printf("Unknown host!\n");
1972 return -1;
1973 }
1974
1975 return 0;
1976 }
1977
1978 static uint64_t kvmppc_read_int_dt(const char *filename)
1979 {
1980 union {
1981 uint32_t v32;
1982 uint64_t v64;
1983 } u;
1984 FILE *f;
1985 int len;
1986
1987 f = fopen(filename, "rb");
1988 if (!f) {
1989 return -1;
1990 }
1991
1992 len = fread(&u, 1, sizeof(u), f);
1993 fclose(f);
1994 switch (len) {
1995 case 4:
1996 /* property is a 32-bit quantity */
1997 return be32_to_cpu(u.v32);
1998 case 8:
1999 return be64_to_cpu(u.v64);
2000 }
2001
2002 return 0;
2003 }
2004
2005 /* Read a CPU node property from the host device tree that's a single
2006 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
2007 * (can't find or open the property, or doesn't understand the
2008 * format) */
2009 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
2010 {
2011 char buf[PATH_MAX], *tmp;
2012 uint64_t val;
2013
2014 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2015 return -1;
2016 }
2017
2018 tmp = g_strdup_printf("%s/%s", buf, propname);
2019 val = kvmppc_read_int_dt(tmp);
2020 g_free(tmp);
2021
2022 return val;
2023 }
2024
2025 uint64_t kvmppc_get_clockfreq(void)
2026 {
2027 return kvmppc_read_int_cpu_dt("clock-frequency");
2028 }
2029
2030 uint32_t kvmppc_get_vmx(void)
2031 {
2032 return kvmppc_read_int_cpu_dt("ibm,vmx");
2033 }
2034
2035 uint32_t kvmppc_get_dfp(void)
2036 {
2037 return kvmppc_read_int_cpu_dt("ibm,dfp");
2038 }
2039
2040 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2041 {
2042 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2043 CPUState *cs = CPU(cpu);
2044
2045 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2046 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2047 return 0;
2048 }
2049
2050 return 1;
2051 }
2052
2053 int kvmppc_get_hasidle(CPUPPCState *env)
2054 {
2055 struct kvm_ppc_pvinfo pvinfo;
2056
2057 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2058 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2059 return 1;
2060 }
2061
2062 return 0;
2063 }
2064
2065 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2066 {
2067 uint32_t *hc = (uint32_t*)buf;
2068 struct kvm_ppc_pvinfo pvinfo;
2069
2070 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2071 memcpy(buf, pvinfo.hcall, buf_len);
2072 return 0;
2073 }
2074
2075 /*
2076 * Fallback to always fail hypercalls regardless of endianness:
2077 *
2078 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2079 * li r3, -1
2080 * b .+8 (becomes nop in wrong endian)
2081 * bswap32(li r3, -1)
2082 */
2083
2084 hc[0] = cpu_to_be32(0x08000048);
2085 hc[1] = cpu_to_be32(0x3860ffff);
2086 hc[2] = cpu_to_be32(0x48000008);
2087 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2088
2089 return 1;
2090 }
2091
2092 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2093 {
2094 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2095 }
2096
2097 void kvmppc_enable_logical_ci_hcalls(void)
2098 {
2099 /*
2100 * FIXME: it would be nice if we could detect the cases where
2101 * we're using a device which requires the in kernel
2102 * implementation of these hcalls, but the kernel lacks them and
2103 * produce a warning.
2104 */
2105 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2106 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2107 }
2108
2109 void kvmppc_enable_set_mode_hcall(void)
2110 {
2111 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2112 }
2113
2114 void kvmppc_enable_clear_ref_mod_hcalls(void)
2115 {
2116 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2117 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2118 }
2119
2120 void kvmppc_set_papr(PowerPCCPU *cpu)
2121 {
2122 CPUState *cs = CPU(cpu);
2123 int ret;
2124
2125 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2126 if (ret) {
2127 error_report("This vCPU type or KVM version does not support PAPR");
2128 exit(1);
2129 }
2130
2131 /* Update the capability flag so we sync the right information
2132 * with kvm */
2133 cap_papr = 1;
2134 }
2135
2136 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2137 {
2138 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2139 }
2140
2141 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2142 {
2143 CPUState *cs = CPU(cpu);
2144 int ret;
2145
2146 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2147 if (ret && mpic_proxy) {
2148 error_report("This KVM version does not support EPR");
2149 exit(1);
2150 }
2151 }
2152
2153 int kvmppc_smt_threads(void)
2154 {
2155 return cap_ppc_smt ? cap_ppc_smt : 1;
2156 }
2157
2158 #ifdef TARGET_PPC64
2159 off_t kvmppc_alloc_rma(void **rma)
2160 {
2161 off_t size;
2162 int fd;
2163 struct kvm_allocate_rma ret;
2164
2165 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2166 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2167 * not necessary on this hardware
2168 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2169 *
2170 * FIXME: We should allow the user to force contiguous RMA
2171 * allocation in the cap_ppc_rma==1 case.
2172 */
2173 if (cap_ppc_rma < 2) {
2174 return 0;
2175 }
2176
2177 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2178 if (fd < 0) {
2179 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2180 strerror(errno));
2181 return -1;
2182 }
2183
2184 size = MIN(ret.rma_size, 256ul << 20);
2185
2186 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2187 if (*rma == MAP_FAILED) {
2188 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2189 return -1;
2190 };
2191
2192 return size;
2193 }
2194
2195 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2196 {
2197 struct kvm_ppc_smmu_info info;
2198 long rampagesize, best_page_shift;
2199 int i;
2200
2201 if (cap_ppc_rma >= 2) {
2202 return current_size;
2203 }
2204
2205 /* Find the largest hardware supported page size that's less than
2206 * or equal to the (logical) backing page size of guest RAM */
2207 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2208 rampagesize = getrampagesize();
2209 best_page_shift = 0;
2210
2211 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2212 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2213
2214 if (!sps->page_shift) {
2215 continue;
2216 }
2217
2218 if ((sps->page_shift > best_page_shift)
2219 && ((1UL << sps->page_shift) <= rampagesize)) {
2220 best_page_shift = sps->page_shift;
2221 }
2222 }
2223
2224 return MIN(current_size,
2225 1ULL << (best_page_shift + hash_shift - 7));
2226 }
2227 #endif
2228
2229 bool kvmppc_spapr_use_multitce(void)
2230 {
2231 return cap_spapr_multitce;
2232 }
2233
2234 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2235 bool need_vfio)
2236 {
2237 struct kvm_create_spapr_tce args = {
2238 .liobn = liobn,
2239 .window_size = window_size,
2240 };
2241 long len;
2242 int fd;
2243 void *table;
2244
2245 /* Must set fd to -1 so we don't try to munmap when called for
2246 * destroying the table, which the upper layers -will- do
2247 */
2248 *pfd = -1;
2249 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2250 return NULL;
2251 }
2252
2253 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2254 if (fd < 0) {
2255 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2256 liobn);
2257 return NULL;
2258 }
2259
2260 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2261 /* FIXME: round this up to page size */
2262
2263 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2264 if (table == MAP_FAILED) {
2265 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2266 liobn);
2267 close(fd);
2268 return NULL;
2269 }
2270
2271 *pfd = fd;
2272 return table;
2273 }
2274
2275 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2276 {
2277 long len;
2278
2279 if (fd < 0) {
2280 return -1;
2281 }
2282
2283 len = nb_table * sizeof(uint64_t);
2284 if ((munmap(table, len) < 0) ||
2285 (close(fd) < 0)) {
2286 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2287 strerror(errno));
2288 /* Leak the table */
2289 }
2290
2291 return 0;
2292 }
2293
2294 int kvmppc_reset_htab(int shift_hint)
2295 {
2296 uint32_t shift = shift_hint;
2297
2298 if (!kvm_enabled()) {
2299 /* Full emulation, tell caller to allocate htab itself */
2300 return 0;
2301 }
2302 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2303 int ret;
2304 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2305 if (ret == -ENOTTY) {
2306 /* At least some versions of PR KVM advertise the
2307 * capability, but don't implement the ioctl(). Oops.
2308 * Return 0 so that we allocate the htab in qemu, as is
2309 * correct for PR. */
2310 return 0;
2311 } else if (ret < 0) {
2312 return ret;
2313 }
2314 return shift;
2315 }
2316
2317 /* We have a kernel that predates the htab reset calls. For PR
2318 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2319 * this era, it has allocated a 16MB fixed size hash table already. */
2320 if (kvmppc_is_pr(kvm_state)) {
2321 /* PR - tell caller to allocate htab */
2322 return 0;
2323 } else {
2324 /* HV - assume 16MB kernel allocated htab */
2325 return 24;
2326 }
2327 }
2328
2329 static inline uint32_t mfpvr(void)
2330 {
2331 uint32_t pvr;
2332
2333 asm ("mfpvr %0"
2334 : "=r"(pvr));
2335 return pvr;
2336 }
2337
2338 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2339 {
2340 if (on) {
2341 *word |= flags;
2342 } else {
2343 *word &= ~flags;
2344 }
2345 }
2346
2347 static void kvmppc_host_cpu_initfn(Object *obj)
2348 {
2349 assert(kvm_enabled());
2350 }
2351
2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2353 {
2354 DeviceClass *dc = DEVICE_CLASS(oc);
2355 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356 uint32_t vmx = kvmppc_get_vmx();
2357 uint32_t dfp = kvmppc_get_dfp();
2358 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2359 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2360
2361 /* Now fix up the class with information we can query from the host */
2362 pcc->pvr = mfpvr();
2363
2364 if (vmx != -1) {
2365 /* Only override when we know what the host supports */
2366 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2367 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2368 }
2369 if (dfp != -1) {
2370 /* Only override when we know what the host supports */
2371 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2372 }
2373
2374 if (dcache_size != -1) {
2375 pcc->l1_dcache_size = dcache_size;
2376 }
2377
2378 if (icache_size != -1) {
2379 pcc->l1_icache_size = icache_size;
2380 }
2381
2382 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2383 dc->cannot_destroy_with_object_finalize_yet = true;
2384 }
2385
2386 bool kvmppc_has_cap_epr(void)
2387 {
2388 return cap_epr;
2389 }
2390
2391 bool kvmppc_has_cap_htab_fd(void)
2392 {
2393 return cap_htab_fd;
2394 }
2395
2396 bool kvmppc_has_cap_fixup_hcalls(void)
2397 {
2398 return cap_fixup_hcalls;
2399 }
2400
2401 bool kvmppc_has_cap_htm(void)
2402 {
2403 return cap_htm;
2404 }
2405
2406 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2407 {
2408 ObjectClass *oc = OBJECT_CLASS(pcc);
2409
2410 while (oc && !object_class_is_abstract(oc)) {
2411 oc = object_class_get_parent(oc);
2412 }
2413 assert(oc);
2414
2415 return POWERPC_CPU_CLASS(oc);
2416 }
2417
2418 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2419 {
2420 uint32_t host_pvr = mfpvr();
2421 PowerPCCPUClass *pvr_pcc;
2422
2423 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2424 if (pvr_pcc == NULL) {
2425 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2426 }
2427
2428 return pvr_pcc;
2429 }
2430
2431 static int kvm_ppc_register_host_cpu_type(void)
2432 {
2433 TypeInfo type_info = {
2434 .name = TYPE_HOST_POWERPC_CPU,
2435 .instance_init = kvmppc_host_cpu_initfn,
2436 .class_init = kvmppc_host_cpu_class_init,
2437 };
2438 PowerPCCPUClass *pvr_pcc;
2439 DeviceClass *dc;
2440 int i;
2441
2442 pvr_pcc = kvm_ppc_get_host_cpu_class();
2443 if (pvr_pcc == NULL) {
2444 return -1;
2445 }
2446 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2447 type_register(&type_info);
2448
2449 #if defined(TARGET_PPC64)
2450 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2451 type_info.parent = TYPE_SPAPR_CPU_CORE,
2452 type_info.instance_size = sizeof(sPAPRCPUCore);
2453 type_info.instance_init = NULL;
2454 type_info.class_init = spapr_cpu_core_class_init;
2455 type_info.class_data = (void *) "host";
2456 type_register(&type_info);
2457 g_free((void *)type_info.name);
2458 #endif
2459
2460 /*
2461 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2462 * we want "POWER8" to be a "family" alias that points to the current
2463 * host CPU type, too)
2464 */
2465 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2466 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2467 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2468 ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2469 char *suffix;
2470
2471 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2472 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2473 if (suffix) {
2474 *suffix = 0;
2475 }
2476 ppc_cpu_aliases[i].oc = oc;
2477 break;
2478 }
2479 }
2480
2481 return 0;
2482 }
2483
2484 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2485 {
2486 struct kvm_rtas_token_args args = {
2487 .token = token,
2488 };
2489
2490 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2491 return -ENOENT;
2492 }
2493
2494 strncpy(args.name, function, sizeof(args.name));
2495
2496 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2497 }
2498
2499 int kvmppc_get_htab_fd(bool write)
2500 {
2501 struct kvm_get_htab_fd s = {
2502 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2503 .start_index = 0,
2504 };
2505
2506 if (!cap_htab_fd) {
2507 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2508 return -1;
2509 }
2510
2511 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2512 }
2513
2514 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2515 {
2516 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2517 uint8_t buf[bufsize];
2518 ssize_t rc;
2519
2520 do {
2521 rc = read(fd, buf, bufsize);
2522 if (rc < 0) {
2523 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2524 strerror(errno));
2525 return rc;
2526 } else if (rc) {
2527 uint8_t *buffer = buf;
2528 ssize_t n = rc;
2529 while (n) {
2530 struct kvm_get_htab_header *head =
2531 (struct kvm_get_htab_header *) buffer;
2532 size_t chunksize = sizeof(*head) +
2533 HASH_PTE_SIZE_64 * head->n_valid;
2534
2535 qemu_put_be32(f, head->index);
2536 qemu_put_be16(f, head->n_valid);
2537 qemu_put_be16(f, head->n_invalid);
2538 qemu_put_buffer(f, (void *)(head + 1),
2539 HASH_PTE_SIZE_64 * head->n_valid);
2540
2541 buffer += chunksize;
2542 n -= chunksize;
2543 }
2544 }
2545 } while ((rc != 0)
2546 && ((max_ns < 0)
2547 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2548
2549 return (rc == 0) ? 1 : 0;
2550 }
2551
2552 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2553 uint16_t n_valid, uint16_t n_invalid)
2554 {
2555 struct kvm_get_htab_header *buf;
2556 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2557 ssize_t rc;
2558
2559 buf = alloca(chunksize);
2560 buf->index = index;
2561 buf->n_valid = n_valid;
2562 buf->n_invalid = n_invalid;
2563
2564 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2565
2566 rc = write(fd, buf, chunksize);
2567 if (rc < 0) {
2568 fprintf(stderr, "Error writing KVM hash table: %s\n",
2569 strerror(errno));
2570 return rc;
2571 }
2572 if (rc != chunksize) {
2573 /* We should never get a short write on a single chunk */
2574 fprintf(stderr, "Short write, restoring KVM hash table\n");
2575 return -1;
2576 }
2577 return 0;
2578 }
2579
2580 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2581 {
2582 return true;
2583 }
2584
2585 void kvm_arch_init_irq_routing(KVMState *s)
2586 {
2587 }
2588
2589 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2590 {
2591 struct kvm_get_htab_fd ghf = {
2592 .flags = 0,
2593 .start_index = ptex,
2594 };
2595 int fd, rc;
2596 int i;
2597
2598 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2599 if (fd < 0) {
2600 hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2601 }
2602
2603 i = 0;
2604 while (i < n) {
2605 struct kvm_get_htab_header *hdr;
2606 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2607 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2608
2609 rc = read(fd, buf, sizeof(buf));
2610 if (rc < 0) {
2611 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2612 }
2613
2614 hdr = (struct kvm_get_htab_header *)buf;
2615 while ((i < n) && ((char *)hdr < (buf + rc))) {
2616 int invalid = hdr->n_invalid;
2617
2618 if (hdr->index != (ptex + i)) {
2619 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2620 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2621 }
2622
2623 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2624 i += hdr->n_valid;
2625
2626 if ((n - i) < invalid) {
2627 invalid = n - i;
2628 }
2629 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2630 i += hdr->n_invalid;
2631
2632 hdr = (struct kvm_get_htab_header *)
2633 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2634 }
2635 }
2636
2637 close(fd);
2638 }
2639
2640 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2641 {
2642 int fd, rc;
2643 struct kvm_get_htab_fd ghf;
2644 struct {
2645 struct kvm_get_htab_header hdr;
2646 uint64_t pte0;
2647 uint64_t pte1;
2648 } buf;
2649
2650 ghf.flags = 0;
2651 ghf.start_index = 0; /* Ignored */
2652 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2653 if (fd < 0) {
2654 hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2655 }
2656
2657 buf.hdr.n_valid = 1;
2658 buf.hdr.n_invalid = 0;
2659 buf.hdr.index = ptex;
2660 buf.pte0 = cpu_to_be64(pte0);
2661 buf.pte1 = cpu_to_be64(pte1);
2662
2663 rc = write(fd, &buf, sizeof(buf));
2664 if (rc != sizeof(buf)) {
2665 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2666 }
2667 close(fd);
2668 }
2669
2670 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2671 uint64_t address, uint32_t data, PCIDevice *dev)
2672 {
2673 return 0;
2674 }
2675
2676 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2677 int vector, PCIDevice *dev)
2678 {
2679 return 0;
2680 }
2681
2682 int kvm_arch_release_virq_post(int virq)
2683 {
2684 return 0;
2685 }
2686
2687 int kvm_arch_msi_data_to_gsi(uint32_t data)
2688 {
2689 return data & 0xffff;
2690 }
2691
2692 int kvmppc_enable_hwrng(void)
2693 {
2694 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2695 return -1;
2696 }
2697
2698 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2699 }