2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
49 #define DPRINTF(fmt, ...) \
50 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
52 #define DPRINTF(fmt, ...) \
56 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
58 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
62 static int cap_interrupt_unset
= false;
63 static int cap_interrupt_level
= false;
64 static int cap_segstate
;
65 static int cap_booke_sregs
;
66 static int cap_ppc_smt
;
67 static int cap_ppc_rma
;
68 static int cap_spapr_tce
;
69 static int cap_spapr_multitce
;
70 static int cap_spapr_vfio
;
72 static int cap_one_reg
;
74 static int cap_ppc_watchdog
;
76 static int cap_htab_fd
;
77 static int cap_fixup_hcalls
;
79 static uint32_t debug_inst_opcode
;
81 /* XXX We have a race condition where we actually have a level triggered
82 * interrupt, but the infrastructure can't expose that yet, so the guest
83 * takes but ignores it, goes to sleep and never gets notified that there's
84 * still an interrupt pending.
86 * As a quick workaround, let's just wake up again 20 ms after we injected
87 * an interrupt. That way we can assure that we're always reinjecting
88 * interrupts in case the guest swallowed them.
90 static QEMUTimer
*idle_timer
;
92 static void kvm_kick_cpu(void *opaque
)
94 PowerPCCPU
*cpu
= opaque
;
96 qemu_cpu_kick(CPU(cpu
));
99 static int kvm_ppc_register_host_cpu_type(void);
101 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
103 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
104 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
105 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
106 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
107 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
108 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
109 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
110 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
111 cap_spapr_vfio
= false;
112 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
113 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
114 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
115 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
116 /* Note: we don't set cap_papr here, because this capability is
117 * only activated after this by kvmppc_set_papr() */
118 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
119 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
121 if (!cap_interrupt_level
) {
122 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
123 "VM to stall at times!\n");
126 kvm_ppc_register_host_cpu_type();
131 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
133 CPUPPCState
*cenv
= &cpu
->env
;
134 CPUState
*cs
= CPU(cpu
);
135 struct kvm_sregs sregs
;
138 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
139 /* What we're really trying to say is "if we're on BookE, we use
140 the native PVR for now". This is the only sane way to check
141 it though, so we potentially confuse users that they can run
142 BookE guests on BookS. Let's hope nobody dares enough :) */
146 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
151 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
156 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
157 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
160 /* Set up a shared TLB array with KVM */
161 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
163 CPUPPCState
*env
= &cpu
->env
;
164 CPUState
*cs
= CPU(cpu
);
165 struct kvm_book3e_206_tlb_params params
= {};
166 struct kvm_config_tlb cfg
= {};
167 unsigned int entries
= 0;
170 if (!kvm_enabled() ||
171 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
175 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
177 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
178 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
179 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
180 entries
+= params
.tlb_sizes
[i
];
183 assert(entries
== env
->nb_tlb
);
184 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
186 env
->tlb_dirty
= true;
188 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
189 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
190 cfg
.params
= (uintptr_t)¶ms
;
191 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
193 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
195 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
196 __func__
, strerror(-ret
));
200 env
->kvm_sw_tlb
= true;
205 #if defined(TARGET_PPC64)
206 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
207 struct kvm_ppc_smmu_info
*info
)
209 CPUPPCState
*env
= &cpu
->env
;
210 CPUState
*cs
= CPU(cpu
);
212 memset(info
, 0, sizeof(*info
));
214 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
215 * need to "guess" what the supported page sizes are.
217 * For that to work we make a few assumptions:
219 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
220 * KVM which only supports 4K and 16M pages, but supports them
221 * regardless of the backing store characteritics. We also don't
222 * support 1T segments.
224 * This is safe as if HV KVM ever supports that capability or PR
225 * KVM grows supports for more page/segment sizes, those versions
226 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
227 * will not hit this fallback
229 * - Else we are running HV KVM. This means we only support page
230 * sizes that fit in the backing store. Additionally we only
231 * advertize 64K pages if the processor is ARCH 2.06 and we assume
232 * P7 encodings for the SLB and hash table. Here too, we assume
233 * support for any newer processor will mean a kernel that
234 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
237 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
242 /* Standard 4k base page size segment */
243 info
->sps
[0].page_shift
= 12;
244 info
->sps
[0].slb_enc
= 0;
245 info
->sps
[0].enc
[0].page_shift
= 12;
246 info
->sps
[0].enc
[0].pte_enc
= 0;
248 /* Standard 16M large page size segment */
249 info
->sps
[1].page_shift
= 24;
250 info
->sps
[1].slb_enc
= SLB_VSID_L
;
251 info
->sps
[1].enc
[0].page_shift
= 24;
252 info
->sps
[1].enc
[0].pte_enc
= 0;
256 /* HV KVM has backing store size restrictions */
257 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
259 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
260 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
263 if (env
->mmu_model
== POWERPC_MMU_2_06
||
264 env
->mmu_model
== POWERPC_MMU_2_07
) {
270 /* Standard 4k base page size segment */
271 info
->sps
[i
].page_shift
= 12;
272 info
->sps
[i
].slb_enc
= 0;
273 info
->sps
[i
].enc
[0].page_shift
= 12;
274 info
->sps
[i
].enc
[0].pte_enc
= 0;
277 /* 64K on MMU 2.06 and later */
278 if (env
->mmu_model
== POWERPC_MMU_2_06
||
279 env
->mmu_model
== POWERPC_MMU_2_07
) {
280 info
->sps
[i
].page_shift
= 16;
281 info
->sps
[i
].slb_enc
= 0x110;
282 info
->sps
[i
].enc
[0].page_shift
= 16;
283 info
->sps
[i
].enc
[0].pte_enc
= 1;
287 /* Standard 16M large page size segment */
288 info
->sps
[i
].page_shift
= 24;
289 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
290 info
->sps
[i
].enc
[0].page_shift
= 24;
291 info
->sps
[i
].enc
[0].pte_enc
= 0;
295 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
297 CPUState
*cs
= CPU(cpu
);
300 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
301 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
307 kvm_get_fallback_smmu_info(cpu
, info
);
310 static long gethugepagesize(const char *mem_path
)
316 ret
= statfs(mem_path
, &fs
);
317 } while (ret
!= 0 && errno
== EINTR
);
320 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
325 #define HUGETLBFS_MAGIC 0x958458f6
327 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
332 /* It's hugepage, return the huge page size */
336 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
339 long *hpsize_min
= opaque
;
341 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
342 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
344 long hpsize
= gethugepagesize(mem_path
);
345 if (hpsize
< *hpsize_min
) {
346 *hpsize_min
= hpsize
;
349 *hpsize_min
= getpagesize();
356 static long getrampagesize(void)
358 long hpsize
= LONG_MAX
;
362 return gethugepagesize(mem_path
);
365 /* it's possible we have memory-backend objects with
366 * hugepage-backed RAM. these may get mapped into system
367 * address space via -numa parameters or memory hotplug
368 * hooks. we want to take these into account, but we
369 * also want to make sure these supported hugepage
370 * sizes are applicable across the entire range of memory
371 * we may boot from, so we take the min across all
372 * backends, and assume normal pages in cases where a
373 * backend isn't backed by hugepages.
375 memdev_root
= object_resolve_path("/objects", NULL
);
377 return getpagesize();
380 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
382 return (hpsize
== LONG_MAX
) ? getpagesize() : hpsize
;
385 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
387 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
391 return (1ul << shift
) <= rampgsize
;
394 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
396 static struct kvm_ppc_smmu_info smmu_info
;
397 static bool has_smmu_info
;
398 CPUPPCState
*env
= &cpu
->env
;
402 /* We only handle page sizes for 64-bit server guests for now */
403 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
407 /* Collect MMU info from kernel if not already */
408 if (!has_smmu_info
) {
409 kvm_get_smmu_info(cpu
, &smmu_info
);
410 has_smmu_info
= true;
413 rampagesize
= getrampagesize();
415 /* Convert to QEMU form */
416 memset(&env
->sps
, 0, sizeof(env
->sps
));
418 /* If we have HV KVM, we need to forbid CI large pages if our
419 * host page size is smaller than 64K.
421 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
422 env
->ci_large_pages
= getpagesize() >= 0x10000;
426 * XXX This loop should be an entry wide AND of the capabilities that
427 * the selected CPU has with the capabilities that KVM supports.
429 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
430 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
431 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
433 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
437 qsps
->page_shift
= ksps
->page_shift
;
438 qsps
->slb_enc
= ksps
->slb_enc
;
439 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
440 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
441 ksps
->enc
[jk
].page_shift
)) {
444 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
445 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
446 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
450 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
454 env
->slb_nr
= smmu_info
.slb_size
;
455 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
456 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
459 #else /* defined (TARGET_PPC64) */
461 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
465 #endif /* !defined (TARGET_PPC64) */
467 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
469 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
472 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
473 * book3s supports only 1 watchpoint, so array size
474 * of 4 is sufficient for now.
476 #define MAX_HW_BKPTS 4
478 static struct HWBreakpoint
{
481 } hw_debug_points
[MAX_HW_BKPTS
];
483 static CPUWatchpoint hw_watchpoint
;
485 /* Default there is no breakpoint and watchpoint supported */
486 static int max_hw_breakpoint
;
487 static int max_hw_watchpoint
;
488 static int nb_hw_breakpoint
;
489 static int nb_hw_watchpoint
;
491 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
493 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
494 max_hw_breakpoint
= 2;
495 max_hw_watchpoint
= 2;
498 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
499 fprintf(stderr
, "Error initializing h/w breakpoints\n");
504 int kvm_arch_init_vcpu(CPUState
*cs
)
506 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
507 CPUPPCState
*cenv
= &cpu
->env
;
510 /* Gather server mmu info from KVM and update the CPU state */
511 kvm_fixup_page_sizes(cpu
);
513 /* Synchronize sregs with kvm */
514 ret
= kvm_arch_sync_sregs(cpu
);
516 if (ret
== -EINVAL
) {
517 error_report("Register sync failed... If you're using kvm-hv.ko,"
518 " only \"-cpu host\" is possible");
523 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
525 /* Some targets support access to KVM's guest TLB. */
526 switch (cenv
->mmu_model
) {
527 case POWERPC_MMU_BOOKE206
:
528 ret
= kvm_booke206_tlb_init(cpu
);
534 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
535 kvmppc_hw_debug_points_init(cenv
);
540 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
542 CPUPPCState
*env
= &cpu
->env
;
543 CPUState
*cs
= CPU(cpu
);
544 struct kvm_dirty_tlb dirty_tlb
;
545 unsigned char *bitmap
;
548 if (!env
->kvm_sw_tlb
) {
552 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
553 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
555 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
556 dirty_tlb
.num_dirty
= env
->nb_tlb
;
558 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
560 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
561 __func__
, strerror(-ret
));
567 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
569 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
570 CPUPPCState
*env
= &cpu
->env
;
575 struct kvm_one_reg reg
= {
577 .addr
= (uintptr_t) &val
,
581 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
583 trace_kvm_failed_spr_get(spr
, strerror(errno
));
585 switch (id
& KVM_REG_SIZE_MASK
) {
586 case KVM_REG_SIZE_U32
:
587 env
->spr
[spr
] = val
.u32
;
590 case KVM_REG_SIZE_U64
:
591 env
->spr
[spr
] = val
.u64
;
595 /* Don't handle this size yet */
601 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
603 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
604 CPUPPCState
*env
= &cpu
->env
;
609 struct kvm_one_reg reg
= {
611 .addr
= (uintptr_t) &val
,
615 switch (id
& KVM_REG_SIZE_MASK
) {
616 case KVM_REG_SIZE_U32
:
617 val
.u32
= env
->spr
[spr
];
620 case KVM_REG_SIZE_U64
:
621 val
.u64
= env
->spr
[spr
];
625 /* Don't handle this size yet */
629 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
631 trace_kvm_failed_spr_set(spr
, strerror(errno
));
635 static int kvm_put_fp(CPUState
*cs
)
637 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
638 CPUPPCState
*env
= &cpu
->env
;
639 struct kvm_one_reg reg
;
643 if (env
->insns_flags
& PPC_FLOAT
) {
644 uint64_t fpscr
= env
->fpscr
;
645 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
647 reg
.id
= KVM_REG_PPC_FPSCR
;
648 reg
.addr
= (uintptr_t)&fpscr
;
649 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
651 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
655 for (i
= 0; i
< 32; i
++) {
658 #ifdef HOST_WORDS_BIGENDIAN
659 vsr
[0] = float64_val(env
->fpr
[i
]);
660 vsr
[1] = env
->vsr
[i
];
662 vsr
[0] = env
->vsr
[i
];
663 vsr
[1] = float64_val(env
->fpr
[i
]);
665 reg
.addr
= (uintptr_t) &vsr
;
666 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
668 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
670 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
677 if (env
->insns_flags
& PPC_ALTIVEC
) {
678 reg
.id
= KVM_REG_PPC_VSCR
;
679 reg
.addr
= (uintptr_t)&env
->vscr
;
680 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
682 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
686 for (i
= 0; i
< 32; i
++) {
687 reg
.id
= KVM_REG_PPC_VR(i
);
688 reg
.addr
= (uintptr_t)&env
->avr
[i
];
689 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
691 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
700 static int kvm_get_fp(CPUState
*cs
)
702 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
703 CPUPPCState
*env
= &cpu
->env
;
704 struct kvm_one_reg reg
;
708 if (env
->insns_flags
& PPC_FLOAT
) {
710 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
712 reg
.id
= KVM_REG_PPC_FPSCR
;
713 reg
.addr
= (uintptr_t)&fpscr
;
714 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
716 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
722 for (i
= 0; i
< 32; i
++) {
725 reg
.addr
= (uintptr_t) &vsr
;
726 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
728 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
730 DPRINTF("Unable to get %s%d from KVM: %s\n",
731 vsx
? "VSR" : "FPR", i
, strerror(errno
));
734 #ifdef HOST_WORDS_BIGENDIAN
735 env
->fpr
[i
] = vsr
[0];
737 env
->vsr
[i
] = vsr
[1];
740 env
->fpr
[i
] = vsr
[1];
742 env
->vsr
[i
] = vsr
[0];
749 if (env
->insns_flags
& PPC_ALTIVEC
) {
750 reg
.id
= KVM_REG_PPC_VSCR
;
751 reg
.addr
= (uintptr_t)&env
->vscr
;
752 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
754 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
758 for (i
= 0; i
< 32; i
++) {
759 reg
.id
= KVM_REG_PPC_VR(i
);
760 reg
.addr
= (uintptr_t)&env
->avr
[i
];
761 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
763 DPRINTF("Unable to get VR%d from KVM: %s\n",
773 #if defined(TARGET_PPC64)
774 static int kvm_get_vpa(CPUState
*cs
)
776 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
777 CPUPPCState
*env
= &cpu
->env
;
778 struct kvm_one_reg reg
;
781 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
782 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
783 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
785 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
789 assert((uintptr_t)&env
->slb_shadow_size
790 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
791 reg
.id
= KVM_REG_PPC_VPA_SLB
;
792 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
793 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
795 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
800 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
801 reg
.id
= KVM_REG_PPC_VPA_DTL
;
802 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
803 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
805 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
813 static int kvm_put_vpa(CPUState
*cs
)
815 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
816 CPUPPCState
*env
= &cpu
->env
;
817 struct kvm_one_reg reg
;
820 /* SLB shadow or DTL can't be registered unless a master VPA is
821 * registered. That means when restoring state, if a VPA *is*
822 * registered, we need to set that up first. If not, we need to
823 * deregister the others before deregistering the master VPA */
824 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
827 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
828 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
829 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
831 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
836 assert((uintptr_t)&env
->slb_shadow_size
837 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
838 reg
.id
= KVM_REG_PPC_VPA_SLB
;
839 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
840 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
842 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
846 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
847 reg
.id
= KVM_REG_PPC_VPA_DTL
;
848 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
849 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
851 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
856 if (!env
->vpa_addr
) {
857 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
858 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
859 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
861 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
868 #endif /* TARGET_PPC64 */
870 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
872 CPUPPCState
*env
= &cpu
->env
;
873 struct kvm_sregs sregs
;
876 sregs
.pvr
= env
->spr
[SPR_PVR
];
878 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
882 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
883 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
884 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
885 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
887 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
892 for (i
= 0; i
< 16; i
++) {
893 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
897 for (i
= 0; i
< 8; i
++) {
898 /* Beware. We have to swap upper and lower bits here */
899 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
901 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
905 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
908 int kvm_arch_put_registers(CPUState
*cs
, int level
)
910 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
911 CPUPPCState
*env
= &cpu
->env
;
912 struct kvm_regs regs
;
916 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
923 regs
.xer
= cpu_read_xer(env
);
927 regs
.srr0
= env
->spr
[SPR_SRR0
];
928 regs
.srr1
= env
->spr
[SPR_SRR1
];
930 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
931 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
932 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
933 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
934 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
935 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
936 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
937 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
939 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
941 for (i
= 0;i
< 32; i
++)
942 regs
.gpr
[i
] = env
->gpr
[i
];
945 for (i
= 0; i
< 8; i
++) {
946 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
949 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
955 if (env
->tlb_dirty
) {
957 env
->tlb_dirty
= false;
960 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
961 ret
= kvmppc_put_books_sregs(cpu
);
967 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
968 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
974 /* We deliberately ignore errors here, for kernels which have
975 * the ONE_REG calls, but don't support the specific
976 * registers, there's a reasonable chance things will still
977 * work, at least until we try to migrate. */
978 for (i
= 0; i
< 1024; i
++) {
979 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
982 kvm_put_one_spr(cs
, id
, i
);
988 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
989 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
991 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
992 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
994 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
995 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
996 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
997 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
998 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
999 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1000 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1001 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1002 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1003 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1007 if (kvm_put_vpa(cs
) < 0) {
1008 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1012 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1013 #endif /* TARGET_PPC64 */
1019 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1021 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1024 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1026 CPUPPCState
*env
= &cpu
->env
;
1027 struct kvm_sregs sregs
;
1030 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1035 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1036 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1037 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1038 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1039 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1040 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1041 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1042 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1043 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1044 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1045 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1046 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1049 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1050 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1051 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1052 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1053 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1054 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1057 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1058 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1061 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1062 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1065 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1066 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1067 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1068 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1069 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1070 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1071 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1072 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1073 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1074 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1075 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1076 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1077 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1078 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1079 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1080 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1081 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1082 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1083 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1084 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1085 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1086 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1087 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1088 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1089 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1090 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1091 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1092 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1093 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1094 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1095 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1096 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1097 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1099 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1100 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1101 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1102 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1103 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1104 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1105 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1108 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1109 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1110 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1113 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1114 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1115 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1116 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1117 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1121 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1122 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1123 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1124 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1125 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1126 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1127 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1128 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1129 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1130 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1131 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1134 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1135 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1138 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1139 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1140 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1143 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1144 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1145 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1146 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1148 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1149 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1150 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1157 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1159 CPUPPCState
*env
= &cpu
->env
;
1160 struct kvm_sregs sregs
;
1164 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1169 if (!env
->external_htab
) {
1170 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1176 * The packed SLB array we get from KVM_GET_SREGS only contains
1177 * information about valid entries. So we flush our internal copy
1178 * to get rid of stale ones, then put all valid SLB entries back
1181 memset(env
->slb
, 0, sizeof(env
->slb
));
1182 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1183 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1184 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1186 * Only restore valid entries
1188 if (rb
& SLB_ESID_V
) {
1189 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1195 for (i
= 0; i
< 16; i
++) {
1196 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1200 for (i
= 0; i
< 8; i
++) {
1201 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1202 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1203 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1204 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1210 int kvm_arch_get_registers(CPUState
*cs
)
1212 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1213 CPUPPCState
*env
= &cpu
->env
;
1214 struct kvm_regs regs
;
1218 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1223 for (i
= 7; i
>= 0; i
--) {
1224 env
->crf
[i
] = cr
& 15;
1228 env
->ctr
= regs
.ctr
;
1230 cpu_write_xer(env
, regs
.xer
);
1231 env
->msr
= regs
.msr
;
1234 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1235 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1237 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1238 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1239 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1240 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1241 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1242 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1243 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1244 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1246 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1248 for (i
= 0;i
< 32; i
++)
1249 env
->gpr
[i
] = regs
.gpr
[i
];
1253 if (cap_booke_sregs
) {
1254 ret
= kvmppc_get_booke_sregs(cpu
);
1261 ret
= kvmppc_get_books_sregs(cpu
);
1268 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1274 /* We deliberately ignore errors here, for kernels which have
1275 * the ONE_REG calls, but don't support the specific
1276 * registers, there's a reasonable chance things will still
1277 * work, at least until we try to migrate. */
1278 for (i
= 0; i
< 1024; i
++) {
1279 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1282 kvm_get_one_spr(cs
, id
, i
);
1288 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1289 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1291 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1292 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1294 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1295 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1296 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1297 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1298 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1299 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1300 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1301 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1302 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1303 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1307 if (kvm_get_vpa(cs
) < 0) {
1308 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1312 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1319 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1321 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1323 if (irq
!= PPC_INTERRUPT_EXT
) {
1327 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1331 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1336 #if defined(TARGET_PPCEMB)
1337 #define PPC_INPUT_INT PPC40x_INPUT_INT
1338 #elif defined(TARGET_PPC64)
1339 #define PPC_INPUT_INT PPC970_INPUT_INT
1341 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1344 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1346 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1347 CPUPPCState
*env
= &cpu
->env
;
1351 qemu_mutex_lock_iothread();
1353 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1354 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1355 if (!cap_interrupt_level
&&
1356 run
->ready_for_interrupt_injection
&&
1357 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1358 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1360 /* For now KVM disregards the 'irq' argument. However, in the
1361 * future KVM could cache it in-kernel to avoid a heavyweight exit
1362 * when reading the UIC.
1364 irq
= KVM_INTERRUPT_SET
;
1366 DPRINTF("injected interrupt %d\n", irq
);
1367 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1369 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1372 /* Always wake up soon in case the interrupt was level based */
1373 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1374 (NANOSECONDS_PER_SECOND
/ 50));
1377 /* We don't know if there are more interrupts pending after this. However,
1378 * the guest will return to userspace in the course of handling this one
1379 * anyways, so we will get a chance to deliver the rest. */
1381 qemu_mutex_unlock_iothread();
1384 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1386 return MEMTXATTRS_UNSPECIFIED
;
1389 int kvm_arch_process_async_events(CPUState
*cs
)
1394 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1396 CPUState
*cs
= CPU(cpu
);
1397 CPUPPCState
*env
= &cpu
->env
;
1399 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1401 cs
->exception_index
= EXCP_HLT
;
1407 /* map dcr access to existing qemu dcr emulation */
1408 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1410 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1411 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1416 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1418 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1419 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1424 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1426 /* Mixed endian case is not handled */
1427 uint32_t sc
= debug_inst_opcode
;
1429 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1431 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1438 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1442 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1443 sc
!= debug_inst_opcode
||
1444 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1452 static int find_hw_breakpoint(target_ulong addr
, int type
)
1456 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1457 <= ARRAY_SIZE(hw_debug_points
));
1459 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1460 if (hw_debug_points
[n
].addr
== addr
&&
1461 hw_debug_points
[n
].type
== type
) {
1469 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1473 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1475 *flag
= BP_MEM_ACCESS
;
1479 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1481 *flag
= BP_MEM_WRITE
;
1485 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1487 *flag
= BP_MEM_READ
;
1494 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1495 target_ulong len
, int type
)
1497 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1501 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1502 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1505 case GDB_BREAKPOINT_HW
:
1506 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1510 if (find_hw_breakpoint(addr
, type
) >= 0) {
1517 case GDB_WATCHPOINT_WRITE
:
1518 case GDB_WATCHPOINT_READ
:
1519 case GDB_WATCHPOINT_ACCESS
:
1520 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1524 if (find_hw_breakpoint(addr
, type
) >= 0) {
1538 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1539 target_ulong len
, int type
)
1543 n
= find_hw_breakpoint(addr
, type
);
1549 case GDB_BREAKPOINT_HW
:
1553 case GDB_WATCHPOINT_WRITE
:
1554 case GDB_WATCHPOINT_READ
:
1555 case GDB_WATCHPOINT_ACCESS
:
1562 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1567 void kvm_arch_remove_all_hw_breakpoints(void)
1569 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1572 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1576 /* Software Breakpoint updates */
1577 if (kvm_sw_breakpoints_active(cs
)) {
1578 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1581 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1582 <= ARRAY_SIZE(hw_debug_points
));
1583 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1585 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1586 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1587 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1588 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1589 switch (hw_debug_points
[n
].type
) {
1590 case GDB_BREAKPOINT_HW
:
1591 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1593 case GDB_WATCHPOINT_WRITE
:
1594 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1596 case GDB_WATCHPOINT_READ
:
1597 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1599 case GDB_WATCHPOINT_ACCESS
:
1600 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1601 KVMPPC_DEBUG_WATCH_READ
;
1604 cpu_abort(cs
, "Unsupported breakpoint type\n");
1606 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1611 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1613 CPUState
*cs
= CPU(cpu
);
1614 CPUPPCState
*env
= &cpu
->env
;
1615 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1620 if (cs
->singlestep_enabled
) {
1622 } else if (arch_info
->status
) {
1623 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1624 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1625 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1629 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1630 KVMPPC_DEBUG_WATCH_WRITE
)) {
1631 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1634 cs
->watchpoint_hit
= &hw_watchpoint
;
1635 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1636 hw_watchpoint
.flags
= flag
;
1640 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1643 /* QEMU is not able to handle debug exception, so inject
1644 * program exception to guest;
1645 * Yes program exception NOT debug exception !!
1646 * When QEMU is using debug resources then debug exception must
1647 * be always set. To achieve this we set MSR_DE and also set
1648 * MSRP_DEP so guest cannot change MSR_DE.
1649 * When emulating debug resource for guest we want guest
1650 * to control MSR_DE (enable/disable debug interrupt on need).
1651 * Supporting both configurations are NOT possible.
1652 * So the result is that we cannot share debug resources
1653 * between QEMU and Guest on BOOKE architecture.
1654 * In the current design QEMU gets the priority over guest,
1655 * this means that if QEMU is using debug resources then guest
1657 * For software breakpoint QEMU uses a privileged instruction;
1658 * So there cannot be any reason that we are here for guest
1659 * set debug exception, only possibility is guest executed a
1660 * privileged / illegal instruction and that's why we are
1661 * injecting a program interrupt.
1664 cpu_synchronize_state(cs
);
1665 /* env->nip is PC, so increment this by 4 to use
1666 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1669 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1670 env
->error_code
= POWERPC_EXCP_INVAL
;
1671 ppc_cpu_do_interrupt(cs
);
1677 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1679 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1680 CPUPPCState
*env
= &cpu
->env
;
1683 qemu_mutex_lock_iothread();
1685 switch (run
->exit_reason
) {
1687 if (run
->dcr
.is_write
) {
1688 DPRINTF("handle dcr write\n");
1689 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1691 DPRINTF("handle dcr read\n");
1692 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1696 DPRINTF("handle halt\n");
1697 ret
= kvmppc_handle_halt(cpu
);
1699 #if defined(TARGET_PPC64)
1700 case KVM_EXIT_PAPR_HCALL
:
1701 DPRINTF("handle PAPR hypercall\n");
1702 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1704 run
->papr_hcall
.args
);
1709 DPRINTF("handle epr\n");
1710 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1713 case KVM_EXIT_WATCHDOG
:
1714 DPRINTF("handle watchdog expiry\n");
1715 watchdog_perform_action();
1719 case KVM_EXIT_DEBUG
:
1720 DPRINTF("handle debug exception\n");
1721 if (kvm_handle_debug(cpu
, run
)) {
1725 /* re-enter, this exception was guest-internal */
1730 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1735 qemu_mutex_unlock_iothread();
1739 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1741 CPUState
*cs
= CPU(cpu
);
1742 uint32_t bits
= tsr_bits
;
1743 struct kvm_one_reg reg
= {
1744 .id
= KVM_REG_PPC_OR_TSR
,
1745 .addr
= (uintptr_t) &bits
,
1748 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1751 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1754 CPUState
*cs
= CPU(cpu
);
1755 uint32_t bits
= tsr_bits
;
1756 struct kvm_one_reg reg
= {
1757 .id
= KVM_REG_PPC_CLEAR_TSR
,
1758 .addr
= (uintptr_t) &bits
,
1761 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1764 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1766 CPUState
*cs
= CPU(cpu
);
1767 CPUPPCState
*env
= &cpu
->env
;
1768 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1770 struct kvm_one_reg reg
= {
1771 .id
= KVM_REG_PPC_TCR
,
1772 .addr
= (uintptr_t) &tcr
,
1775 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1778 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1780 CPUState
*cs
= CPU(cpu
);
1783 if (!kvm_enabled()) {
1787 if (!cap_ppc_watchdog
) {
1788 printf("warning: KVM does not support watchdog");
1792 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1794 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1795 __func__
, strerror(-ret
));
1802 static int read_cpuinfo(const char *field
, char *value
, int len
)
1806 int field_len
= strlen(field
);
1809 f
= fopen("/proc/cpuinfo", "r");
1815 if (!fgets(line
, sizeof(line
), f
)) {
1818 if (!strncmp(line
, field
, field_len
)) {
1819 pstrcpy(value
, len
, line
);
1830 uint32_t kvmppc_get_tbfreq(void)
1834 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1836 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1840 if (!(ns
= strchr(line
, ':'))) {
1849 bool kvmppc_get_host_serial(char **value
)
1851 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1855 bool kvmppc_get_host_model(char **value
)
1857 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1860 /* Try to find a device tree node for a CPU with clock-frequency property */
1861 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1863 struct dirent
*dirp
;
1866 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1867 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1872 while ((dirp
= readdir(dp
)) != NULL
) {
1874 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1876 f
= fopen(buf
, "r");
1878 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1885 if (buf
[0] == '\0') {
1886 printf("Unknown host!\n");
1893 static uint64_t kvmppc_read_int_dt(const char *filename
)
1902 f
= fopen(filename
, "rb");
1907 len
= fread(&u
, 1, sizeof(u
), f
);
1911 /* property is a 32-bit quantity */
1912 return be32_to_cpu(u
.v32
);
1914 return be64_to_cpu(u
.v64
);
1920 /* Read a CPU node property from the host device tree that's a single
1921 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1922 * (can't find or open the property, or doesn't understand the
1924 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1926 char buf
[PATH_MAX
], *tmp
;
1929 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1933 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1934 val
= kvmppc_read_int_dt(tmp
);
1940 uint64_t kvmppc_get_clockfreq(void)
1942 return kvmppc_read_int_cpu_dt("clock-frequency");
1945 uint32_t kvmppc_get_vmx(void)
1947 return kvmppc_read_int_cpu_dt("ibm,vmx");
1950 uint32_t kvmppc_get_dfp(void)
1952 return kvmppc_read_int_cpu_dt("ibm,dfp");
1955 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1957 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1958 CPUState
*cs
= CPU(cpu
);
1960 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1961 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1968 int kvmppc_get_hasidle(CPUPPCState
*env
)
1970 struct kvm_ppc_pvinfo pvinfo
;
1972 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1973 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1980 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1982 uint32_t *hc
= (uint32_t*)buf
;
1983 struct kvm_ppc_pvinfo pvinfo
;
1985 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1986 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1991 * Fallback to always fail hypercalls regardless of endianness:
1993 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1995 * b .+8 (becomes nop in wrong endian)
1996 * bswap32(li r3, -1)
1999 hc
[0] = cpu_to_be32(0x08000048);
2000 hc
[1] = cpu_to_be32(0x3860ffff);
2001 hc
[2] = cpu_to_be32(0x48000008);
2002 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2007 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2009 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2012 void kvmppc_enable_logical_ci_hcalls(void)
2015 * FIXME: it would be nice if we could detect the cases where
2016 * we're using a device which requires the in kernel
2017 * implementation of these hcalls, but the kernel lacks them and
2018 * produce a warning.
2020 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2021 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2024 void kvmppc_enable_set_mode_hcall(void)
2026 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2029 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2031 CPUState
*cs
= CPU(cpu
);
2034 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2036 error_report("This vCPU type or KVM version does not support PAPR");
2040 /* Update the capability flag so we sync the right information
2045 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2047 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2050 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2052 CPUState
*cs
= CPU(cpu
);
2055 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2056 if (ret
&& mpic_proxy
) {
2057 error_report("This KVM version does not support EPR");
2062 int kvmppc_smt_threads(void)
2064 return cap_ppc_smt
? cap_ppc_smt
: 1;
2068 off_t
kvmppc_alloc_rma(void **rma
)
2072 struct kvm_allocate_rma ret
;
2074 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2075 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2076 * not necessary on this hardware
2077 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2079 * FIXME: We should allow the user to force contiguous RMA
2080 * allocation in the cap_ppc_rma==1 case.
2082 if (cap_ppc_rma
< 2) {
2086 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2088 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2093 size
= MIN(ret
.rma_size
, 256ul << 20);
2095 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2096 if (*rma
== MAP_FAILED
) {
2097 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2104 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2106 struct kvm_ppc_smmu_info info
;
2107 long rampagesize
, best_page_shift
;
2110 if (cap_ppc_rma
>= 2) {
2111 return current_size
;
2114 /* Find the largest hardware supported page size that's less than
2115 * or equal to the (logical) backing page size of guest RAM */
2116 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2117 rampagesize
= getrampagesize();
2118 best_page_shift
= 0;
2120 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2121 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2123 if (!sps
->page_shift
) {
2127 if ((sps
->page_shift
> best_page_shift
)
2128 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2129 best_page_shift
= sps
->page_shift
;
2133 return MIN(current_size
,
2134 1ULL << (best_page_shift
+ hash_shift
- 7));
2138 bool kvmppc_spapr_use_multitce(void)
2140 return cap_spapr_multitce
;
2143 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2146 struct kvm_create_spapr_tce args
= {
2148 .window_size
= window_size
,
2154 /* Must set fd to -1 so we don't try to munmap when called for
2155 * destroying the table, which the upper layers -will- do
2158 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2162 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2164 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2169 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2170 /* FIXME: round this up to page size */
2172 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2173 if (table
== MAP_FAILED
) {
2174 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2184 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2192 len
= nb_table
* sizeof(uint64_t);
2193 if ((munmap(table
, len
) < 0) ||
2195 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2197 /* Leak the table */
2203 int kvmppc_reset_htab(int shift_hint
)
2205 uint32_t shift
= shift_hint
;
2207 if (!kvm_enabled()) {
2208 /* Full emulation, tell caller to allocate htab itself */
2211 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2213 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2214 if (ret
== -ENOTTY
) {
2215 /* At least some versions of PR KVM advertise the
2216 * capability, but don't implement the ioctl(). Oops.
2217 * Return 0 so that we allocate the htab in qemu, as is
2218 * correct for PR. */
2220 } else if (ret
< 0) {
2226 /* We have a kernel that predates the htab reset calls. For PR
2227 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2228 * this era, it has allocated a 16MB fixed size hash table
2229 * already. Kernels of this era have the GET_PVINFO capability
2230 * only on PR, so we use this hack to determine the right
2232 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2233 /* PR - tell caller to allocate htab */
2236 /* HV - assume 16MB kernel allocated htab */
2241 static inline uint32_t mfpvr(void)
2250 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2259 static void kvmppc_host_cpu_initfn(Object
*obj
)
2261 assert(kvm_enabled());
2264 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2266 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2267 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2268 uint32_t vmx
= kvmppc_get_vmx();
2269 uint32_t dfp
= kvmppc_get_dfp();
2270 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2271 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2273 /* Now fix up the class with information we can query from the host */
2277 /* Only override when we know what the host supports */
2278 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2279 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2282 /* Only override when we know what the host supports */
2283 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2286 if (dcache_size
!= -1) {
2287 pcc
->l1_dcache_size
= dcache_size
;
2290 if (icache_size
!= -1) {
2291 pcc
->l1_icache_size
= icache_size
;
2294 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2295 dc
->cannot_destroy_with_object_finalize_yet
= true;
2298 bool kvmppc_has_cap_epr(void)
2303 bool kvmppc_has_cap_htab_fd(void)
2308 bool kvmppc_has_cap_fixup_hcalls(void)
2310 return cap_fixup_hcalls
;
2313 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2315 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2317 while (oc
&& !object_class_is_abstract(oc
)) {
2318 oc
= object_class_get_parent(oc
);
2322 return POWERPC_CPU_CLASS(oc
);
2325 static int kvm_ppc_register_host_cpu_type(void)
2327 TypeInfo type_info
= {
2328 .name
= TYPE_HOST_POWERPC_CPU
,
2329 .instance_init
= kvmppc_host_cpu_initfn
,
2330 .class_init
= kvmppc_host_cpu_class_init
,
2332 uint32_t host_pvr
= mfpvr();
2333 PowerPCCPUClass
*pvr_pcc
;
2336 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2337 if (pvr_pcc
== NULL
) {
2338 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2340 if (pvr_pcc
== NULL
) {
2343 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2344 type_register(&type_info
);
2346 /* Register generic family CPU class for a family */
2347 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2348 dc
= DEVICE_CLASS(pvr_pcc
);
2349 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2350 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2351 type_register(&type_info
);
2356 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2358 struct kvm_rtas_token_args args
= {
2362 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2366 strncpy(args
.name
, function
, sizeof(args
.name
));
2368 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2371 int kvmppc_get_htab_fd(bool write
)
2373 struct kvm_get_htab_fd s
= {
2374 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2379 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2383 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2386 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2388 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2389 uint8_t buf
[bufsize
];
2393 rc
= read(fd
, buf
, bufsize
);
2395 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2399 uint8_t *buffer
= buf
;
2402 struct kvm_get_htab_header
*head
=
2403 (struct kvm_get_htab_header
*) buffer
;
2404 size_t chunksize
= sizeof(*head
) +
2405 HASH_PTE_SIZE_64
* head
->n_valid
;
2407 qemu_put_be32(f
, head
->index
);
2408 qemu_put_be16(f
, head
->n_valid
);
2409 qemu_put_be16(f
, head
->n_invalid
);
2410 qemu_put_buffer(f
, (void *)(head
+ 1),
2411 HASH_PTE_SIZE_64
* head
->n_valid
);
2413 buffer
+= chunksize
;
2419 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2421 return (rc
== 0) ? 1 : 0;
2424 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2425 uint16_t n_valid
, uint16_t n_invalid
)
2427 struct kvm_get_htab_header
*buf
;
2428 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2431 buf
= alloca(chunksize
);
2433 buf
->n_valid
= n_valid
;
2434 buf
->n_invalid
= n_invalid
;
2436 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2438 rc
= write(fd
, buf
, chunksize
);
2440 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2444 if (rc
!= chunksize
) {
2445 /* We should never get a short write on a single chunk */
2446 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2452 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2457 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2462 int kvm_arch_on_sigbus(int code
, void *addr
)
2467 void kvm_arch_init_irq_routing(KVMState
*s
)
2471 struct kvm_get_htab_buf
{
2472 struct kvm_get_htab_header header
;
2474 * We require one extra byte for read
2476 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2479 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2482 struct kvm_get_htab_fd ghf
;
2483 struct kvm_get_htab_buf
*hpte_buf
;
2486 ghf
.start_index
= pte_index
;
2487 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2492 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2494 * Read the hpte group
2496 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2501 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2510 void kvmppc_hash64_free_pteg(uint64_t token
)
2512 struct kvm_get_htab_buf
*htab_buf
;
2514 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2520 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2521 target_ulong pte0
, target_ulong pte1
)
2524 struct kvm_get_htab_fd ghf
;
2525 struct kvm_get_htab_buf hpte_buf
;
2528 ghf
.start_index
= 0; /* Ignored */
2529 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2534 hpte_buf
.header
.n_valid
= 1;
2535 hpte_buf
.header
.n_invalid
= 0;
2536 hpte_buf
.header
.index
= pte_index
;
2537 hpte_buf
.hpte
[0] = pte0
;
2538 hpte_buf
.hpte
[1] = pte1
;
2540 * Write the hpte entry.
2541 * CAUTION: write() has the warn_unused_result attribute. Hence we
2542 * need to check the return value, even though we do nothing.
2544 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2556 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2557 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2562 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2564 return data
& 0xffff;
2567 int kvmppc_enable_hwrng(void)
2569 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2573 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);