2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #define DPRINTF(fmt, ...) \
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
57 static int cap_interrupt_unset
= false;
58 static int cap_interrupt_level
= false;
59 static int cap_segstate
;
60 static int cap_booke_sregs
;
61 static int cap_ppc_smt
;
62 static int cap_ppc_rma
;
63 static int cap_spapr_tce
;
65 static int cap_one_reg
;
67 static int cap_ppc_watchdog
;
69 static int cap_htab_fd
;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer
*idle_timer
;
82 static void kvm_kick_cpu(void *opaque
)
84 PowerPCCPU
*cpu
= opaque
;
86 qemu_cpu_kick(CPU(cpu
));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState
*s
)
93 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
94 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
95 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
96 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
97 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
98 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
99 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
100 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
101 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
102 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
103 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
108 if (!cap_interrupt_level
) {
109 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
118 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
120 CPUPPCState
*cenv
= &cpu
->env
;
121 CPUState
*cs
= CPU(cpu
);
122 struct kvm_sregs sregs
;
125 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
133 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
138 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
143 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
144 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
150 CPUPPCState
*env
= &cpu
->env
;
151 CPUState
*cs
= CPU(cpu
);
152 struct kvm_book3e_206_tlb_params params
= {};
153 struct kvm_config_tlb cfg
= {};
154 unsigned int entries
= 0;
157 if (!kvm_enabled() ||
158 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
162 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
164 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
165 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
166 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
167 entries
+= params
.tlb_sizes
[i
];
170 assert(entries
== env
->nb_tlb
);
171 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
173 env
->tlb_dirty
= true;
175 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
176 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
177 cfg
.params
= (uintptr_t)¶ms
;
178 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
180 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
182 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
183 __func__
, strerror(-ret
));
187 env
->kvm_sw_tlb
= true;
192 #if defined(TARGET_PPC64)
193 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
194 struct kvm_ppc_smmu_info
*info
)
196 CPUPPCState
*env
= &cpu
->env
;
197 CPUState
*cs
= CPU(cpu
);
199 memset(info
, 0, sizeof(*info
));
201 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
202 * need to "guess" what the supported page sizes are.
204 * For that to work we make a few assumptions:
206 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
207 * KVM which only supports 4K and 16M pages, but supports them
208 * regardless of the backing store characteritics. We also don't
209 * support 1T segments.
211 * This is safe as if HV KVM ever supports that capability or PR
212 * KVM grows supports for more page/segment sizes, those versions
213 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
214 * will not hit this fallback
216 * - Else we are running HV KVM. This means we only support page
217 * sizes that fit in the backing store. Additionally we only
218 * advertize 64K pages if the processor is ARCH 2.06 and we assume
219 * P7 encodings for the SLB and hash table. Here too, we assume
220 * support for any newer processor will mean a kernel that
221 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
224 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
229 /* Standard 4k base page size segment */
230 info
->sps
[0].page_shift
= 12;
231 info
->sps
[0].slb_enc
= 0;
232 info
->sps
[0].enc
[0].page_shift
= 12;
233 info
->sps
[0].enc
[0].pte_enc
= 0;
235 /* Standard 16M large page size segment */
236 info
->sps
[1].page_shift
= 24;
237 info
->sps
[1].slb_enc
= SLB_VSID_L
;
238 info
->sps
[1].enc
[0].page_shift
= 24;
239 info
->sps
[1].enc
[0].pte_enc
= 0;
243 /* HV KVM has backing store size restrictions */
244 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
246 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
247 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
250 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
256 /* Standard 4k base page size segment */
257 info
->sps
[i
].page_shift
= 12;
258 info
->sps
[i
].slb_enc
= 0;
259 info
->sps
[i
].enc
[0].page_shift
= 12;
260 info
->sps
[i
].enc
[0].pte_enc
= 0;
263 /* 64K on MMU 2.06 */
264 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
265 info
->sps
[i
].page_shift
= 16;
266 info
->sps
[i
].slb_enc
= 0x110;
267 info
->sps
[i
].enc
[0].page_shift
= 16;
268 info
->sps
[i
].enc
[0].pte_enc
= 1;
272 /* Standard 16M large page size segment */
273 info
->sps
[i
].page_shift
= 24;
274 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
275 info
->sps
[i
].enc
[0].page_shift
= 24;
276 info
->sps
[i
].enc
[0].pte_enc
= 0;
280 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
282 CPUState
*cs
= CPU(cpu
);
285 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
286 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
292 kvm_get_fallback_smmu_info(cpu
, info
);
295 static long getrampagesize(void)
301 /* guest RAM is backed by normal anonymous pages */
302 return getpagesize();
306 ret
= statfs(mem_path
, &fs
);
307 } while (ret
!= 0 && errno
== EINTR
);
310 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
315 #define HUGETLBFS_MAGIC 0x958458f6
317 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
318 /* Explicit mempath, but it's ordinary pages */
319 return getpagesize();
322 /* It's hugepage, return the huge page size */
326 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
328 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
332 return (1ul << shift
) <= rampgsize
;
335 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
337 static struct kvm_ppc_smmu_info smmu_info
;
338 static bool has_smmu_info
;
339 CPUPPCState
*env
= &cpu
->env
;
343 /* We only handle page sizes for 64-bit server guests for now */
344 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
348 /* Collect MMU info from kernel if not already */
349 if (!has_smmu_info
) {
350 kvm_get_smmu_info(cpu
, &smmu_info
);
351 has_smmu_info
= true;
354 rampagesize
= getrampagesize();
356 /* Convert to QEMU form */
357 memset(&env
->sps
, 0, sizeof(env
->sps
));
360 * XXX This loop should be an entry wide AND of the capabilities that
361 * the selected CPU has with the capabilities that KVM supports.
363 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
364 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
365 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
367 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
371 qsps
->page_shift
= ksps
->page_shift
;
372 qsps
->slb_enc
= ksps
->slb_enc
;
373 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
374 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
375 ksps
->enc
[jk
].page_shift
)) {
378 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
379 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
380 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
384 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
388 env
->slb_nr
= smmu_info
.slb_size
;
389 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
390 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
393 #else /* defined (TARGET_PPC64) */
395 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
399 #endif /* !defined (TARGET_PPC64) */
401 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
403 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
406 int kvm_arch_init_vcpu(CPUState
*cs
)
408 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
409 CPUPPCState
*cenv
= &cpu
->env
;
412 /* Gather server mmu info from KVM and update the CPU state */
413 kvm_fixup_page_sizes(cpu
);
415 /* Synchronize sregs with kvm */
416 ret
= kvm_arch_sync_sregs(cpu
);
421 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
423 /* Some targets support access to KVM's guest TLB. */
424 switch (cenv
->mmu_model
) {
425 case POWERPC_MMU_BOOKE206
:
426 ret
= kvm_booke206_tlb_init(cpu
);
435 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
437 CPUPPCState
*env
= &cpu
->env
;
438 CPUState
*cs
= CPU(cpu
);
439 struct kvm_dirty_tlb dirty_tlb
;
440 unsigned char *bitmap
;
443 if (!env
->kvm_sw_tlb
) {
447 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
448 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
450 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
451 dirty_tlb
.num_dirty
= env
->nb_tlb
;
453 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
455 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
456 __func__
, strerror(-ret
));
462 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
464 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
465 CPUPPCState
*env
= &cpu
->env
;
470 struct kvm_one_reg reg
= {
472 .addr
= (uintptr_t) &val
,
476 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
478 trace_kvm_failed_spr_get(spr
, strerror(errno
));
480 switch (id
& KVM_REG_SIZE_MASK
) {
481 case KVM_REG_SIZE_U32
:
482 env
->spr
[spr
] = val
.u32
;
485 case KVM_REG_SIZE_U64
:
486 env
->spr
[spr
] = val
.u64
;
490 /* Don't handle this size yet */
496 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
498 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
499 CPUPPCState
*env
= &cpu
->env
;
504 struct kvm_one_reg reg
= {
506 .addr
= (uintptr_t) &val
,
510 switch (id
& KVM_REG_SIZE_MASK
) {
511 case KVM_REG_SIZE_U32
:
512 val
.u32
= env
->spr
[spr
];
515 case KVM_REG_SIZE_U64
:
516 val
.u64
= env
->spr
[spr
];
520 /* Don't handle this size yet */
524 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
526 trace_kvm_failed_spr_set(spr
, strerror(errno
));
530 static int kvm_put_fp(CPUState
*cs
)
532 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
533 CPUPPCState
*env
= &cpu
->env
;
534 struct kvm_one_reg reg
;
538 if (env
->insns_flags
& PPC_FLOAT
) {
539 uint64_t fpscr
= env
->fpscr
;
540 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
542 reg
.id
= KVM_REG_PPC_FPSCR
;
543 reg
.addr
= (uintptr_t)&fpscr
;
544 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
546 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
550 for (i
= 0; i
< 32; i
++) {
553 vsr
[0] = float64_val(env
->fpr
[i
]);
554 vsr
[1] = env
->vsr
[i
];
555 reg
.addr
= (uintptr_t) &vsr
;
556 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
558 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
560 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
567 if (env
->insns_flags
& PPC_ALTIVEC
) {
568 reg
.id
= KVM_REG_PPC_VSCR
;
569 reg
.addr
= (uintptr_t)&env
->vscr
;
570 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
572 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
576 for (i
= 0; i
< 32; i
++) {
577 reg
.id
= KVM_REG_PPC_VR(i
);
578 reg
.addr
= (uintptr_t)&env
->avr
[i
];
579 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
581 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
590 static int kvm_get_fp(CPUState
*cs
)
592 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
593 CPUPPCState
*env
= &cpu
->env
;
594 struct kvm_one_reg reg
;
598 if (env
->insns_flags
& PPC_FLOAT
) {
600 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
602 reg
.id
= KVM_REG_PPC_FPSCR
;
603 reg
.addr
= (uintptr_t)&fpscr
;
604 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
606 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
612 for (i
= 0; i
< 32; i
++) {
615 reg
.addr
= (uintptr_t) &vsr
;
616 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
618 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
620 DPRINTF("Unable to get %s%d from KVM: %s\n",
621 vsx
? "VSR" : "FPR", i
, strerror(errno
));
624 env
->fpr
[i
] = vsr
[0];
626 env
->vsr
[i
] = vsr
[1];
632 if (env
->insns_flags
& PPC_ALTIVEC
) {
633 reg
.id
= KVM_REG_PPC_VSCR
;
634 reg
.addr
= (uintptr_t)&env
->vscr
;
635 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
637 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
641 for (i
= 0; i
< 32; i
++) {
642 reg
.id
= KVM_REG_PPC_VR(i
);
643 reg
.addr
= (uintptr_t)&env
->avr
[i
];
644 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
646 DPRINTF("Unable to get VR%d from KVM: %s\n",
656 #if defined(TARGET_PPC64)
657 static int kvm_get_vpa(CPUState
*cs
)
659 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
660 CPUPPCState
*env
= &cpu
->env
;
661 struct kvm_one_reg reg
;
664 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
665 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
666 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
668 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
672 assert((uintptr_t)&env
->slb_shadow_size
673 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
674 reg
.id
= KVM_REG_PPC_VPA_SLB
;
675 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
676 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
678 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
683 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
684 reg
.id
= KVM_REG_PPC_VPA_DTL
;
685 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
686 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
688 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
696 static int kvm_put_vpa(CPUState
*cs
)
698 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
699 CPUPPCState
*env
= &cpu
->env
;
700 struct kvm_one_reg reg
;
703 /* SLB shadow or DTL can't be registered unless a master VPA is
704 * registered. That means when restoring state, if a VPA *is*
705 * registered, we need to set that up first. If not, we need to
706 * deregister the others before deregistering the master VPA */
707 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
710 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
711 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
712 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
714 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
719 assert((uintptr_t)&env
->slb_shadow_size
720 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
721 reg
.id
= KVM_REG_PPC_VPA_SLB
;
722 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
723 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
725 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
729 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
730 reg
.id
= KVM_REG_PPC_VPA_DTL
;
731 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
732 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
734 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
739 if (!env
->vpa_addr
) {
740 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
741 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
742 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
744 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
751 #endif /* TARGET_PPC64 */
753 int kvm_arch_put_registers(CPUState
*cs
, int level
)
755 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
756 CPUPPCState
*env
= &cpu
->env
;
757 struct kvm_regs regs
;
761 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
768 regs
.xer
= cpu_read_xer(env
);
772 regs
.srr0
= env
->spr
[SPR_SRR0
];
773 regs
.srr1
= env
->spr
[SPR_SRR1
];
775 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
776 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
777 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
778 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
779 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
780 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
781 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
782 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
784 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
786 for (i
= 0;i
< 32; i
++)
787 regs
.gpr
[i
] = env
->gpr
[i
];
790 for (i
= 0; i
< 8; i
++) {
791 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
794 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
800 if (env
->tlb_dirty
) {
802 env
->tlb_dirty
= false;
805 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
806 struct kvm_sregs sregs
;
808 sregs
.pvr
= env
->spr
[SPR_PVR
];
810 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
814 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
815 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
816 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
817 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
819 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
824 for (i
= 0; i
< 16; i
++) {
825 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
829 for (i
= 0; i
< 8; i
++) {
830 /* Beware. We have to swap upper and lower bits here */
831 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
833 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
837 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
843 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
844 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
850 /* We deliberately ignore errors here, for kernels which have
851 * the ONE_REG calls, but don't support the specific
852 * registers, there's a reasonable chance things will still
853 * work, at least until we try to migrate. */
854 for (i
= 0; i
< 1024; i
++) {
855 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
858 kvm_put_one_spr(cs
, id
, i
);
864 if (kvm_put_vpa(cs
) < 0) {
865 DPRINTF("Warning: Unable to set VPA information to KVM\n");
868 #endif /* TARGET_PPC64 */
874 int kvm_arch_get_registers(CPUState
*cs
)
876 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
877 CPUPPCState
*env
= &cpu
->env
;
878 struct kvm_regs regs
;
879 struct kvm_sregs sregs
;
883 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
888 for (i
= 7; i
>= 0; i
--) {
889 env
->crf
[i
] = cr
& 15;
895 cpu_write_xer(env
, regs
.xer
);
899 env
->spr
[SPR_SRR0
] = regs
.srr0
;
900 env
->spr
[SPR_SRR1
] = regs
.srr1
;
902 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
903 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
904 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
905 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
906 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
907 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
908 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
909 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
911 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
913 for (i
= 0;i
< 32; i
++)
914 env
->gpr
[i
] = regs
.gpr
[i
];
918 if (cap_booke_sregs
) {
919 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
924 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
925 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
926 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
927 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
928 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
929 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
930 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
931 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
932 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
933 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
934 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
935 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
938 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
939 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
940 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
941 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
942 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
943 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
946 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
947 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
950 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
951 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
954 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
955 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
956 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
957 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
958 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
959 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
960 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
961 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
962 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
963 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
964 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
965 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
966 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
967 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
968 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
969 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
970 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
972 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
973 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
974 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
975 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
978 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
979 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
982 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
983 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
984 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
988 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
989 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
990 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
991 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
992 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
993 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
994 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
995 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
996 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
997 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
998 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1001 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1002 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1005 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1006 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1007 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1010 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1011 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1012 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1013 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1015 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1016 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1017 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1023 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1028 if (!env
->external_htab
) {
1029 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1035 * The packed SLB array we get from KVM_GET_SREGS only contains
1036 * information about valid entries. So we flush our internal
1037 * copy to get rid of stale ones, then put all valid SLB entries
1040 memset(env
->slb
, 0, sizeof(env
->slb
));
1041 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1042 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1043 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1045 * Only restore valid entries
1047 if (rb
& SLB_ESID_V
) {
1048 ppc_store_slb(env
, rb
, rs
);
1054 for (i
= 0; i
< 16; i
++) {
1055 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1059 for (i
= 0; i
< 8; i
++) {
1060 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1061 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1062 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1063 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1068 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1074 /* We deliberately ignore errors here, for kernels which have
1075 * the ONE_REG calls, but don't support the specific
1076 * registers, there's a reasonable chance things will still
1077 * work, at least until we try to migrate. */
1078 for (i
= 0; i
< 1024; i
++) {
1079 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1082 kvm_get_one_spr(cs
, id
, i
);
1088 if (kvm_get_vpa(cs
) < 0) {
1089 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1098 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1100 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1102 if (irq
!= PPC_INTERRUPT_EXT
) {
1106 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1110 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1115 #if defined(TARGET_PPCEMB)
1116 #define PPC_INPUT_INT PPC40x_INPUT_INT
1117 #elif defined(TARGET_PPC64)
1118 #define PPC_INPUT_INT PPC970_INPUT_INT
1120 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1123 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1125 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1126 CPUPPCState
*env
= &cpu
->env
;
1130 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1131 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1132 if (!cap_interrupt_level
&&
1133 run
->ready_for_interrupt_injection
&&
1134 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1135 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1137 /* For now KVM disregards the 'irq' argument. However, in the
1138 * future KVM could cache it in-kernel to avoid a heavyweight exit
1139 * when reading the UIC.
1141 irq
= KVM_INTERRUPT_SET
;
1143 DPRINTF("injected interrupt %d\n", irq
);
1144 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1146 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1149 /* Always wake up soon in case the interrupt was level based */
1150 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1151 (get_ticks_per_sec() / 50));
1154 /* We don't know if there are more interrupts pending after this. However,
1155 * the guest will return to userspace in the course of handling this one
1156 * anyways, so we will get a chance to deliver the rest. */
1159 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1163 int kvm_arch_process_async_events(CPUState
*cs
)
1168 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1170 CPUState
*cs
= CPU(cpu
);
1171 CPUPPCState
*env
= &cpu
->env
;
1173 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1175 cs
->exception_index
= EXCP_HLT
;
1181 /* map dcr access to existing qemu dcr emulation */
1182 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1184 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1185 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1190 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1192 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1193 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1198 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1200 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1201 CPUPPCState
*env
= &cpu
->env
;
1204 switch (run
->exit_reason
) {
1206 if (run
->dcr
.is_write
) {
1207 DPRINTF("handle dcr write\n");
1208 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1210 DPRINTF("handle dcr read\n");
1211 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1215 DPRINTF("handle halt\n");
1216 ret
= kvmppc_handle_halt(cpu
);
1218 #if defined(TARGET_PPC64)
1219 case KVM_EXIT_PAPR_HCALL
:
1220 DPRINTF("handle PAPR hypercall\n");
1221 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1223 run
->papr_hcall
.args
);
1228 DPRINTF("handle epr\n");
1229 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1232 case KVM_EXIT_WATCHDOG
:
1233 DPRINTF("handle watchdog expiry\n");
1234 watchdog_perform_action();
1239 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1247 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1249 CPUState
*cs
= CPU(cpu
);
1250 uint32_t bits
= tsr_bits
;
1251 struct kvm_one_reg reg
= {
1252 .id
= KVM_REG_PPC_OR_TSR
,
1253 .addr
= (uintptr_t) &bits
,
1256 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1259 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1262 CPUState
*cs
= CPU(cpu
);
1263 uint32_t bits
= tsr_bits
;
1264 struct kvm_one_reg reg
= {
1265 .id
= KVM_REG_PPC_CLEAR_TSR
,
1266 .addr
= (uintptr_t) &bits
,
1269 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1272 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1274 CPUState
*cs
= CPU(cpu
);
1275 CPUPPCState
*env
= &cpu
->env
;
1276 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1278 struct kvm_one_reg reg
= {
1279 .id
= KVM_REG_PPC_TCR
,
1280 .addr
= (uintptr_t) &tcr
,
1283 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1286 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1288 CPUState
*cs
= CPU(cpu
);
1291 if (!kvm_enabled()) {
1295 if (!cap_ppc_watchdog
) {
1296 printf("warning: KVM does not support watchdog");
1300 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1302 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1303 __func__
, strerror(-ret
));
1310 static int read_cpuinfo(const char *field
, char *value
, int len
)
1314 int field_len
= strlen(field
);
1317 f
= fopen("/proc/cpuinfo", "r");
1323 if(!fgets(line
, sizeof(line
), f
)) {
1326 if (!strncmp(line
, field
, field_len
)) {
1327 pstrcpy(value
, len
, line
);
1338 uint32_t kvmppc_get_tbfreq(void)
1342 uint32_t retval
= get_ticks_per_sec();
1344 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1348 if (!(ns
= strchr(line
, ':'))) {
1358 /* Try to find a device tree node for a CPU with clock-frequency property */
1359 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1361 struct dirent
*dirp
;
1364 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1365 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1370 while ((dirp
= readdir(dp
)) != NULL
) {
1372 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1374 f
= fopen(buf
, "r");
1376 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1383 if (buf
[0] == '\0') {
1384 printf("Unknown host!\n");
1391 /* Read a CPU node property from the host device tree that's a single
1392 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1393 * (can't find or open the property, or doesn't understand the
1395 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1405 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1409 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1410 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1412 f
= fopen(buf
, "rb");
1417 len
= fread(&u
, 1, sizeof(u
), f
);
1421 /* property is a 32-bit quantity */
1422 return be32_to_cpu(u
.v32
);
1424 return be64_to_cpu(u
.v64
);
1430 uint64_t kvmppc_get_clockfreq(void)
1432 return kvmppc_read_int_cpu_dt("clock-frequency");
1435 uint32_t kvmppc_get_vmx(void)
1437 return kvmppc_read_int_cpu_dt("ibm,vmx");
1440 uint32_t kvmppc_get_dfp(void)
1442 return kvmppc_read_int_cpu_dt("ibm,dfp");
1445 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1447 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1448 CPUState
*cs
= CPU(cpu
);
1450 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1451 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1458 int kvmppc_get_hasidle(CPUPPCState
*env
)
1460 struct kvm_ppc_pvinfo pvinfo
;
1462 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1463 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1470 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1472 uint32_t *hc
= (uint32_t*)buf
;
1473 struct kvm_ppc_pvinfo pvinfo
;
1475 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1476 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1481 * Fallback to always fail hypercalls:
1497 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1499 CPUState
*cs
= CPU(cpu
);
1502 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1504 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1507 /* Update the capability flag so we sync the right information
1512 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1514 CPUState
*cs
= CPU(cpu
);
1517 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1518 if (ret
&& mpic_proxy
) {
1519 cpu_abort(cs
, "This KVM version does not support EPR\n");
1523 int kvmppc_smt_threads(void)
1525 return cap_ppc_smt
? cap_ppc_smt
: 1;
1529 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1534 struct kvm_allocate_rma ret
;
1535 MemoryRegion
*rma_region
;
1537 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1538 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1539 * not necessary on this hardware
1540 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1542 * FIXME: We should allow the user to force contiguous RMA
1543 * allocation in the cap_ppc_rma==1 case.
1545 if (cap_ppc_rma
< 2) {
1549 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1551 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1556 size
= MIN(ret
.rma_size
, 256ul << 20);
1558 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1559 if (rma
== MAP_FAILED
) {
1560 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1564 rma_region
= g_new(MemoryRegion
, 1);
1565 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1566 vmstate_register_ram_global(rma_region
);
1567 memory_region_add_subregion(sysmem
, 0, rma_region
);
1572 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1574 struct kvm_ppc_smmu_info info
;
1575 long rampagesize
, best_page_shift
;
1578 if (cap_ppc_rma
>= 2) {
1579 return current_size
;
1582 /* Find the largest hardware supported page size that's less than
1583 * or equal to the (logical) backing page size of guest RAM */
1584 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1585 rampagesize
= getrampagesize();
1586 best_page_shift
= 0;
1588 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1589 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1591 if (!sps
->page_shift
) {
1595 if ((sps
->page_shift
> best_page_shift
)
1596 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1597 best_page_shift
= sps
->page_shift
;
1601 return MIN(current_size
,
1602 1ULL << (best_page_shift
+ hash_shift
- 7));
1606 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1608 struct kvm_create_spapr_tce args
= {
1610 .window_size
= window_size
,
1616 /* Must set fd to -1 so we don't try to munmap when called for
1617 * destroying the table, which the upper layers -will- do
1620 if (!cap_spapr_tce
) {
1624 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1626 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1631 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1632 /* FIXME: round this up to page size */
1634 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1635 if (table
== MAP_FAILED
) {
1636 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1646 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1654 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(uint64_t);
1655 if ((munmap(table
, len
) < 0) ||
1657 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1659 /* Leak the table */
1665 int kvmppc_reset_htab(int shift_hint
)
1667 uint32_t shift
= shift_hint
;
1669 if (!kvm_enabled()) {
1670 /* Full emulation, tell caller to allocate htab itself */
1673 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1675 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1676 if (ret
== -ENOTTY
) {
1677 /* At least some versions of PR KVM advertise the
1678 * capability, but don't implement the ioctl(). Oops.
1679 * Return 0 so that we allocate the htab in qemu, as is
1680 * correct for PR. */
1682 } else if (ret
< 0) {
1688 /* We have a kernel that predates the htab reset calls. For PR
1689 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1690 * this era, it has allocated a 16MB fixed size hash table
1691 * already. Kernels of this era have the GET_PVINFO capability
1692 * only on PR, so we use this hack to determine the right
1694 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1695 /* PR - tell caller to allocate htab */
1698 /* HV - assume 16MB kernel allocated htab */
1703 static inline uint32_t mfpvr(void)
1712 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1721 static void kvmppc_host_cpu_initfn(Object
*obj
)
1723 assert(kvm_enabled());
1726 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1728 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1729 uint32_t vmx
= kvmppc_get_vmx();
1730 uint32_t dfp
= kvmppc_get_dfp();
1731 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1732 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1734 /* Now fix up the class with information we can query from the host */
1738 /* Only override when we know what the host supports */
1739 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1740 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1743 /* Only override when we know what the host supports */
1744 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1747 if (dcache_size
!= -1) {
1748 pcc
->l1_dcache_size
= dcache_size
;
1751 if (icache_size
!= -1) {
1752 pcc
->l1_icache_size
= icache_size
;
1756 bool kvmppc_has_cap_epr(void)
1761 bool kvmppc_has_cap_htab_fd(void)
1766 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
1768 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
1770 while (oc
&& !object_class_is_abstract(oc
)) {
1771 oc
= object_class_get_parent(oc
);
1775 return POWERPC_CPU_CLASS(oc
);
1778 static int kvm_ppc_register_host_cpu_type(void)
1780 TypeInfo type_info
= {
1781 .name
= TYPE_HOST_POWERPC_CPU
,
1782 .instance_init
= kvmppc_host_cpu_initfn
,
1783 .class_init
= kvmppc_host_cpu_class_init
,
1785 uint32_t host_pvr
= mfpvr();
1786 PowerPCCPUClass
*pvr_pcc
;
1789 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1790 if (pvr_pcc
== NULL
) {
1791 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
1793 if (pvr_pcc
== NULL
) {
1796 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1797 type_register(&type_info
);
1799 /* Register generic family CPU class for a family */
1800 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
1801 dc
= DEVICE_CLASS(pvr_pcc
);
1802 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1803 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
1804 type_register(&type_info
);
1809 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
1811 struct kvm_rtas_token_args args
= {
1815 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
1819 strncpy(args
.name
, function
, sizeof(args
.name
));
1821 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
1824 int kvmppc_get_htab_fd(bool write
)
1826 struct kvm_get_htab_fd s
= {
1827 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1832 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1836 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1839 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1841 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1842 uint8_t buf
[bufsize
];
1846 rc
= read(fd
, buf
, bufsize
);
1848 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1852 /* Kernel already retuns data in BE format for the file */
1853 qemu_put_buffer(f
, buf
, rc
);
1857 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1859 return (rc
== 0) ? 1 : 0;
1862 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1863 uint16_t n_valid
, uint16_t n_invalid
)
1865 struct kvm_get_htab_header
*buf
;
1866 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1869 buf
= alloca(chunksize
);
1870 /* This is KVM on ppc, so this is all big-endian */
1872 buf
->n_valid
= n_valid
;
1873 buf
->n_invalid
= n_invalid
;
1875 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1877 rc
= write(fd
, buf
, chunksize
);
1879 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1883 if (rc
!= chunksize
) {
1884 /* We should never get a short write on a single chunk */
1885 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1891 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1896 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1901 int kvm_arch_on_sigbus(int code
, void *addr
)
1906 void kvm_arch_init_irq_routing(KVMState
*s
)
1910 int kvm_arch_insert_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1915 int kvm_arch_remove_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1920 int kvm_arch_insert_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1925 int kvm_arch_remove_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1930 void kvm_arch_remove_all_hw_breakpoints(void)
1934 void kvm_arch_update_guest_debug(CPUState
*cpu
, struct kvm_guest_debug
*dbg
)
1938 struct kvm_get_htab_buf
{
1939 struct kvm_get_htab_header header
;
1941 * We require one extra byte for read
1943 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
1946 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
1949 struct kvm_get_htab_fd ghf
;
1950 struct kvm_get_htab_buf
*hpte_buf
;
1953 ghf
.start_index
= pte_index
;
1954 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
1959 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
1961 * Read the hpte group
1963 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
1968 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
1977 void kvmppc_hash64_free_pteg(uint64_t token
)
1979 struct kvm_get_htab_buf
*htab_buf
;
1981 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
1987 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
1988 target_ulong pte0
, target_ulong pte1
)
1991 struct kvm_get_htab_fd ghf
;
1992 struct kvm_get_htab_buf hpte_buf
;
1995 ghf
.start_index
= 0; /* Ignored */
1996 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2001 hpte_buf
.header
.n_valid
= 1;
2002 hpte_buf
.header
.n_invalid
= 0;
2003 hpte_buf
.header
.index
= pte_index
;
2004 hpte_buf
.hpte
[0] = pte0
;
2005 hpte_buf
.hpte
[1] = pte1
;
2007 * Write the hpte entry.
2008 * CAUTION: write() has the warn_unused_result attribute. Hence we
2009 * need to check the return value, even though we do nothing.
2011 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {