2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
48 #define DPRINTF(fmt, ...) \
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
58 static int cap_interrupt_unset
= false;
59 static int cap_interrupt_level
= false;
60 static int cap_segstate
;
61 static int cap_booke_sregs
;
62 static int cap_ppc_smt
;
63 static int cap_ppc_rma
;
64 static int cap_spapr_tce
;
66 static int cap_one_reg
;
68 static int cap_ppc_watchdog
;
70 static int cap_htab_fd
;
72 /* XXX We have a race condition where we actually have a level triggered
73 * interrupt, but the infrastructure can't expose that yet, so the guest
74 * takes but ignores it, goes to sleep and never gets notified that there's
75 * still an interrupt pending.
77 * As a quick workaround, let's just wake up again 20 ms after we injected
78 * an interrupt. That way we can assure that we're always reinjecting
79 * interrupts in case the guest swallowed them.
81 static QEMUTimer
*idle_timer
;
83 static void kvm_kick_cpu(void *opaque
)
85 PowerPCCPU
*cpu
= opaque
;
87 qemu_cpu_kick(CPU(cpu
));
90 static int kvm_ppc_register_host_cpu_type(void);
92 int kvm_arch_init(KVMState
*s
)
94 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
95 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
96 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
97 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
98 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
99 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
100 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
101 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
102 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
103 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
104 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
105 /* Note: we don't set cap_papr here, because this capability is
106 * only activated after this by kvmppc_set_papr() */
107 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
109 if (!cap_interrupt_level
) {
110 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
111 "VM to stall at times!\n");
114 kvm_ppc_register_host_cpu_type();
119 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
121 CPUPPCState
*cenv
= &cpu
->env
;
122 CPUState
*cs
= CPU(cpu
);
123 struct kvm_sregs sregs
;
126 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
127 /* What we're really trying to say is "if we're on BookE, we use
128 the native PVR for now". This is the only sane way to check
129 it though, so we potentially confuse users that they can run
130 BookE guests on BookS. Let's hope nobody dares enough :) */
134 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
139 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
144 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
145 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
148 /* Set up a shared TLB array with KVM */
149 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
151 CPUPPCState
*env
= &cpu
->env
;
152 CPUState
*cs
= CPU(cpu
);
153 struct kvm_book3e_206_tlb_params params
= {};
154 struct kvm_config_tlb cfg
= {};
155 unsigned int entries
= 0;
158 if (!kvm_enabled() ||
159 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
163 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
165 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
166 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
167 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
168 entries
+= params
.tlb_sizes
[i
];
171 assert(entries
== env
->nb_tlb
);
172 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
174 env
->tlb_dirty
= true;
176 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
177 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
178 cfg
.params
= (uintptr_t)¶ms
;
179 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
181 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
183 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184 __func__
, strerror(-ret
));
188 env
->kvm_sw_tlb
= true;
193 #if defined(TARGET_PPC64)
194 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
195 struct kvm_ppc_smmu_info
*info
)
197 CPUPPCState
*env
= &cpu
->env
;
198 CPUState
*cs
= CPU(cpu
);
200 memset(info
, 0, sizeof(*info
));
202 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203 * need to "guess" what the supported page sizes are.
205 * For that to work we make a few assumptions:
207 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208 * KVM which only supports 4K and 16M pages, but supports them
209 * regardless of the backing store characteritics. We also don't
210 * support 1T segments.
212 * This is safe as if HV KVM ever supports that capability or PR
213 * KVM grows supports for more page/segment sizes, those versions
214 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215 * will not hit this fallback
217 * - Else we are running HV KVM. This means we only support page
218 * sizes that fit in the backing store. Additionally we only
219 * advertize 64K pages if the processor is ARCH 2.06 and we assume
220 * P7 encodings for the SLB and hash table. Here too, we assume
221 * support for any newer processor will mean a kernel that
222 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
225 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
230 /* Standard 4k base page size segment */
231 info
->sps
[0].page_shift
= 12;
232 info
->sps
[0].slb_enc
= 0;
233 info
->sps
[0].enc
[0].page_shift
= 12;
234 info
->sps
[0].enc
[0].pte_enc
= 0;
236 /* Standard 16M large page size segment */
237 info
->sps
[1].page_shift
= 24;
238 info
->sps
[1].slb_enc
= SLB_VSID_L
;
239 info
->sps
[1].enc
[0].page_shift
= 24;
240 info
->sps
[1].enc
[0].pte_enc
= 0;
244 /* HV KVM has backing store size restrictions */
245 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
247 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
248 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
251 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
257 /* Standard 4k base page size segment */
258 info
->sps
[i
].page_shift
= 12;
259 info
->sps
[i
].slb_enc
= 0;
260 info
->sps
[i
].enc
[0].page_shift
= 12;
261 info
->sps
[i
].enc
[0].pte_enc
= 0;
264 /* 64K on MMU 2.06 */
265 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
266 info
->sps
[i
].page_shift
= 16;
267 info
->sps
[i
].slb_enc
= 0x110;
268 info
->sps
[i
].enc
[0].page_shift
= 16;
269 info
->sps
[i
].enc
[0].pte_enc
= 1;
273 /* Standard 16M large page size segment */
274 info
->sps
[i
].page_shift
= 24;
275 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
276 info
->sps
[i
].enc
[0].page_shift
= 24;
277 info
->sps
[i
].enc
[0].pte_enc
= 0;
281 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
283 CPUState
*cs
= CPU(cpu
);
286 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
287 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
293 kvm_get_fallback_smmu_info(cpu
, info
);
296 static long getrampagesize(void)
302 /* guest RAM is backed by normal anonymous pages */
303 return getpagesize();
307 ret
= statfs(mem_path
, &fs
);
308 } while (ret
!= 0 && errno
== EINTR
);
311 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
316 #define HUGETLBFS_MAGIC 0x958458f6
318 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
319 /* Explicit mempath, but it's ordinary pages */
320 return getpagesize();
323 /* It's hugepage, return the huge page size */
327 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
329 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
333 return (1ul << shift
) <= rampgsize
;
336 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
338 static struct kvm_ppc_smmu_info smmu_info
;
339 static bool has_smmu_info
;
340 CPUPPCState
*env
= &cpu
->env
;
344 /* We only handle page sizes for 64-bit server guests for now */
345 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
349 /* Collect MMU info from kernel if not already */
350 if (!has_smmu_info
) {
351 kvm_get_smmu_info(cpu
, &smmu_info
);
352 has_smmu_info
= true;
355 rampagesize
= getrampagesize();
357 /* Convert to QEMU form */
358 memset(&env
->sps
, 0, sizeof(env
->sps
));
361 * XXX This loop should be an entry wide AND of the capabilities that
362 * the selected CPU has with the capabilities that KVM supports.
364 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
365 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
366 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
368 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
372 qsps
->page_shift
= ksps
->page_shift
;
373 qsps
->slb_enc
= ksps
->slb_enc
;
374 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
375 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
376 ksps
->enc
[jk
].page_shift
)) {
379 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
380 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
381 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
385 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
389 env
->slb_nr
= smmu_info
.slb_size
;
390 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
391 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
394 #else /* defined (TARGET_PPC64) */
396 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
400 #endif /* !defined (TARGET_PPC64) */
402 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
404 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
407 int kvm_arch_init_vcpu(CPUState
*cs
)
409 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
410 CPUPPCState
*cenv
= &cpu
->env
;
413 /* Gather server mmu info from KVM and update the CPU state */
414 kvm_fixup_page_sizes(cpu
);
416 /* Synchronize sregs with kvm */
417 ret
= kvm_arch_sync_sregs(cpu
);
422 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
424 /* Some targets support access to KVM's guest TLB. */
425 switch (cenv
->mmu_model
) {
426 case POWERPC_MMU_BOOKE206
:
427 ret
= kvm_booke206_tlb_init(cpu
);
436 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
438 CPUPPCState
*env
= &cpu
->env
;
439 CPUState
*cs
= CPU(cpu
);
440 struct kvm_dirty_tlb dirty_tlb
;
441 unsigned char *bitmap
;
444 if (!env
->kvm_sw_tlb
) {
448 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
449 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
451 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
452 dirty_tlb
.num_dirty
= env
->nb_tlb
;
454 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
456 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
457 __func__
, strerror(-ret
));
463 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
465 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
466 CPUPPCState
*env
= &cpu
->env
;
471 struct kvm_one_reg reg
= {
473 .addr
= (uintptr_t) &val
,
477 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
479 trace_kvm_failed_spr_get(spr
, strerror(errno
));
481 switch (id
& KVM_REG_SIZE_MASK
) {
482 case KVM_REG_SIZE_U32
:
483 env
->spr
[spr
] = val
.u32
;
486 case KVM_REG_SIZE_U64
:
487 env
->spr
[spr
] = val
.u64
;
491 /* Don't handle this size yet */
497 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
499 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
500 CPUPPCState
*env
= &cpu
->env
;
505 struct kvm_one_reg reg
= {
507 .addr
= (uintptr_t) &val
,
511 switch (id
& KVM_REG_SIZE_MASK
) {
512 case KVM_REG_SIZE_U32
:
513 val
.u32
= env
->spr
[spr
];
516 case KVM_REG_SIZE_U64
:
517 val
.u64
= env
->spr
[spr
];
521 /* Don't handle this size yet */
525 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
527 trace_kvm_failed_spr_set(spr
, strerror(errno
));
531 static int kvm_put_fp(CPUState
*cs
)
533 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
534 CPUPPCState
*env
= &cpu
->env
;
535 struct kvm_one_reg reg
;
539 if (env
->insns_flags
& PPC_FLOAT
) {
540 uint64_t fpscr
= env
->fpscr
;
541 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
543 reg
.id
= KVM_REG_PPC_FPSCR
;
544 reg
.addr
= (uintptr_t)&fpscr
;
545 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
547 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
551 for (i
= 0; i
< 32; i
++) {
554 vsr
[0] = float64_val(env
->fpr
[i
]);
555 vsr
[1] = env
->vsr
[i
];
556 reg
.addr
= (uintptr_t) &vsr
;
557 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
559 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
561 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
568 if (env
->insns_flags
& PPC_ALTIVEC
) {
569 reg
.id
= KVM_REG_PPC_VSCR
;
570 reg
.addr
= (uintptr_t)&env
->vscr
;
571 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
573 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
577 for (i
= 0; i
< 32; i
++) {
578 reg
.id
= KVM_REG_PPC_VR(i
);
579 reg
.addr
= (uintptr_t)&env
->avr
[i
];
580 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
582 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
591 static int kvm_get_fp(CPUState
*cs
)
593 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
594 CPUPPCState
*env
= &cpu
->env
;
595 struct kvm_one_reg reg
;
599 if (env
->insns_flags
& PPC_FLOAT
) {
601 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
603 reg
.id
= KVM_REG_PPC_FPSCR
;
604 reg
.addr
= (uintptr_t)&fpscr
;
605 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
607 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
613 for (i
= 0; i
< 32; i
++) {
616 reg
.addr
= (uintptr_t) &vsr
;
617 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
619 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
621 DPRINTF("Unable to get %s%d from KVM: %s\n",
622 vsx
? "VSR" : "FPR", i
, strerror(errno
));
625 env
->fpr
[i
] = vsr
[0];
627 env
->vsr
[i
] = vsr
[1];
633 if (env
->insns_flags
& PPC_ALTIVEC
) {
634 reg
.id
= KVM_REG_PPC_VSCR
;
635 reg
.addr
= (uintptr_t)&env
->vscr
;
636 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
638 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
642 for (i
= 0; i
< 32; i
++) {
643 reg
.id
= KVM_REG_PPC_VR(i
);
644 reg
.addr
= (uintptr_t)&env
->avr
[i
];
645 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
647 DPRINTF("Unable to get VR%d from KVM: %s\n",
657 #if defined(TARGET_PPC64)
658 static int kvm_get_vpa(CPUState
*cs
)
660 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
661 CPUPPCState
*env
= &cpu
->env
;
662 struct kvm_one_reg reg
;
665 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
666 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
667 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
669 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
673 assert((uintptr_t)&env
->slb_shadow_size
674 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
675 reg
.id
= KVM_REG_PPC_VPA_SLB
;
676 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
677 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
679 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
684 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
685 reg
.id
= KVM_REG_PPC_VPA_DTL
;
686 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
687 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
689 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
697 static int kvm_put_vpa(CPUState
*cs
)
699 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
700 CPUPPCState
*env
= &cpu
->env
;
701 struct kvm_one_reg reg
;
704 /* SLB shadow or DTL can't be registered unless a master VPA is
705 * registered. That means when restoring state, if a VPA *is*
706 * registered, we need to set that up first. If not, we need to
707 * deregister the others before deregistering the master VPA */
708 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
711 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
712 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
713 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
715 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
720 assert((uintptr_t)&env
->slb_shadow_size
721 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
722 reg
.id
= KVM_REG_PPC_VPA_SLB
;
723 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
724 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
726 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
730 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
731 reg
.id
= KVM_REG_PPC_VPA_DTL
;
732 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
733 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
735 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
740 if (!env
->vpa_addr
) {
741 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
742 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
743 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
745 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
752 #endif /* TARGET_PPC64 */
754 int kvm_arch_put_registers(CPUState
*cs
, int level
)
756 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
757 CPUPPCState
*env
= &cpu
->env
;
758 struct kvm_regs regs
;
762 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
769 regs
.xer
= cpu_read_xer(env
);
773 regs
.srr0
= env
->spr
[SPR_SRR0
];
774 regs
.srr1
= env
->spr
[SPR_SRR1
];
776 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
777 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
778 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
779 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
780 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
781 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
782 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
783 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
785 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
787 for (i
= 0;i
< 32; i
++)
788 regs
.gpr
[i
] = env
->gpr
[i
];
791 for (i
= 0; i
< 8; i
++) {
792 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
795 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
801 if (env
->tlb_dirty
) {
803 env
->tlb_dirty
= false;
806 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
807 struct kvm_sregs sregs
;
809 sregs
.pvr
= env
->spr
[SPR_PVR
];
811 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
815 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
816 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
817 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
818 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
820 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
825 for (i
= 0; i
< 16; i
++) {
826 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
830 for (i
= 0; i
< 8; i
++) {
831 /* Beware. We have to swap upper and lower bits here */
832 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
834 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
838 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
844 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
845 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
851 /* We deliberately ignore errors here, for kernels which have
852 * the ONE_REG calls, but don't support the specific
853 * registers, there's a reasonable chance things will still
854 * work, at least until we try to migrate. */
855 for (i
= 0; i
< 1024; i
++) {
856 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
859 kvm_put_one_spr(cs
, id
, i
);
865 if (kvm_put_vpa(cs
) < 0) {
866 DPRINTF("Warning: Unable to set VPA information to KVM\n");
870 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
871 #endif /* TARGET_PPC64 */
877 int kvm_arch_get_registers(CPUState
*cs
)
879 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
880 CPUPPCState
*env
= &cpu
->env
;
881 struct kvm_regs regs
;
882 struct kvm_sregs sregs
;
886 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
891 for (i
= 7; i
>= 0; i
--) {
892 env
->crf
[i
] = cr
& 15;
898 cpu_write_xer(env
, regs
.xer
);
902 env
->spr
[SPR_SRR0
] = regs
.srr0
;
903 env
->spr
[SPR_SRR1
] = regs
.srr1
;
905 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
906 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
907 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
908 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
909 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
910 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
911 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
912 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
914 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
916 for (i
= 0;i
< 32; i
++)
917 env
->gpr
[i
] = regs
.gpr
[i
];
921 if (cap_booke_sregs
) {
922 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
927 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
928 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
929 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
930 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
931 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
932 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
933 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
934 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
935 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
936 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
937 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
938 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
941 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
942 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
943 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
944 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
945 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
946 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
949 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
950 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
953 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
954 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
957 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
958 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
959 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
960 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
961 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
962 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
963 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
964 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
965 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
966 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
967 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
968 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
969 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
970 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
971 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
972 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
973 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
975 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
976 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
977 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
978 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
981 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
982 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
985 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
986 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
987 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
991 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
992 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
993 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
994 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
995 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
996 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
997 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
998 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
999 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1000 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1001 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1004 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1005 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1008 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1009 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1010 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1013 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1014 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1015 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1016 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1018 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1019 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1020 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1026 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1031 if (!env
->external_htab
) {
1032 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1038 * The packed SLB array we get from KVM_GET_SREGS only contains
1039 * information about valid entries. So we flush our internal
1040 * copy to get rid of stale ones, then put all valid SLB entries
1043 memset(env
->slb
, 0, sizeof(env
->slb
));
1044 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1045 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1046 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1048 * Only restore valid entries
1050 if (rb
& SLB_ESID_V
) {
1051 ppc_store_slb(env
, rb
, rs
);
1057 for (i
= 0; i
< 16; i
++) {
1058 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1062 for (i
= 0; i
< 8; i
++) {
1063 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1064 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1065 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1066 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1071 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1077 /* We deliberately ignore errors here, for kernels which have
1078 * the ONE_REG calls, but don't support the specific
1079 * registers, there's a reasonable chance things will still
1080 * work, at least until we try to migrate. */
1081 for (i
= 0; i
< 1024; i
++) {
1082 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1085 kvm_get_one_spr(cs
, id
, i
);
1091 if (kvm_get_vpa(cs
) < 0) {
1092 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1096 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1103 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1105 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1107 if (irq
!= PPC_INTERRUPT_EXT
) {
1111 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1115 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1120 #if defined(TARGET_PPCEMB)
1121 #define PPC_INPUT_INT PPC40x_INPUT_INT
1122 #elif defined(TARGET_PPC64)
1123 #define PPC_INPUT_INT PPC970_INPUT_INT
1125 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1128 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1130 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1131 CPUPPCState
*env
= &cpu
->env
;
1135 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1136 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1137 if (!cap_interrupt_level
&&
1138 run
->ready_for_interrupt_injection
&&
1139 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1140 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1142 /* For now KVM disregards the 'irq' argument. However, in the
1143 * future KVM could cache it in-kernel to avoid a heavyweight exit
1144 * when reading the UIC.
1146 irq
= KVM_INTERRUPT_SET
;
1148 DPRINTF("injected interrupt %d\n", irq
);
1149 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1151 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1154 /* Always wake up soon in case the interrupt was level based */
1155 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1156 (get_ticks_per_sec() / 50));
1159 /* We don't know if there are more interrupts pending after this. However,
1160 * the guest will return to userspace in the course of handling this one
1161 * anyways, so we will get a chance to deliver the rest. */
1164 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1168 int kvm_arch_process_async_events(CPUState
*cs
)
1173 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1175 CPUState
*cs
= CPU(cpu
);
1176 CPUPPCState
*env
= &cpu
->env
;
1178 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1180 cs
->exception_index
= EXCP_HLT
;
1186 /* map dcr access to existing qemu dcr emulation */
1187 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1189 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1190 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1195 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1197 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1198 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1203 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1205 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1206 CPUPPCState
*env
= &cpu
->env
;
1209 switch (run
->exit_reason
) {
1211 if (run
->dcr
.is_write
) {
1212 DPRINTF("handle dcr write\n");
1213 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1215 DPRINTF("handle dcr read\n");
1216 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1220 DPRINTF("handle halt\n");
1221 ret
= kvmppc_handle_halt(cpu
);
1223 #if defined(TARGET_PPC64)
1224 case KVM_EXIT_PAPR_HCALL
:
1225 DPRINTF("handle PAPR hypercall\n");
1226 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1228 run
->papr_hcall
.args
);
1233 DPRINTF("handle epr\n");
1234 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1237 case KVM_EXIT_WATCHDOG
:
1238 DPRINTF("handle watchdog expiry\n");
1239 watchdog_perform_action();
1244 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1252 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1254 CPUState
*cs
= CPU(cpu
);
1255 uint32_t bits
= tsr_bits
;
1256 struct kvm_one_reg reg
= {
1257 .id
= KVM_REG_PPC_OR_TSR
,
1258 .addr
= (uintptr_t) &bits
,
1261 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1264 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1267 CPUState
*cs
= CPU(cpu
);
1268 uint32_t bits
= tsr_bits
;
1269 struct kvm_one_reg reg
= {
1270 .id
= KVM_REG_PPC_CLEAR_TSR
,
1271 .addr
= (uintptr_t) &bits
,
1274 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1277 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1279 CPUState
*cs
= CPU(cpu
);
1280 CPUPPCState
*env
= &cpu
->env
;
1281 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1283 struct kvm_one_reg reg
= {
1284 .id
= KVM_REG_PPC_TCR
,
1285 .addr
= (uintptr_t) &tcr
,
1288 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1291 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1293 CPUState
*cs
= CPU(cpu
);
1296 if (!kvm_enabled()) {
1300 if (!cap_ppc_watchdog
) {
1301 printf("warning: KVM does not support watchdog");
1305 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1307 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1308 __func__
, strerror(-ret
));
1315 static int read_cpuinfo(const char *field
, char *value
, int len
)
1319 int field_len
= strlen(field
);
1322 f
= fopen("/proc/cpuinfo", "r");
1328 if(!fgets(line
, sizeof(line
), f
)) {
1331 if (!strncmp(line
, field
, field_len
)) {
1332 pstrcpy(value
, len
, line
);
1343 uint32_t kvmppc_get_tbfreq(void)
1347 uint32_t retval
= get_ticks_per_sec();
1349 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1353 if (!(ns
= strchr(line
, ':'))) {
1363 /* Try to find a device tree node for a CPU with clock-frequency property */
1364 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1366 struct dirent
*dirp
;
1369 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1370 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1375 while ((dirp
= readdir(dp
)) != NULL
) {
1377 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1379 f
= fopen(buf
, "r");
1381 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1388 if (buf
[0] == '\0') {
1389 printf("Unknown host!\n");
1396 /* Read a CPU node property from the host device tree that's a single
1397 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1398 * (can't find or open the property, or doesn't understand the
1400 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1410 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1414 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1415 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1417 f
= fopen(buf
, "rb");
1422 len
= fread(&u
, 1, sizeof(u
), f
);
1426 /* property is a 32-bit quantity */
1427 return be32_to_cpu(u
.v32
);
1429 return be64_to_cpu(u
.v64
);
1435 uint64_t kvmppc_get_clockfreq(void)
1437 return kvmppc_read_int_cpu_dt("clock-frequency");
1440 uint32_t kvmppc_get_vmx(void)
1442 return kvmppc_read_int_cpu_dt("ibm,vmx");
1445 uint32_t kvmppc_get_dfp(void)
1447 return kvmppc_read_int_cpu_dt("ibm,dfp");
1450 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1452 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1453 CPUState
*cs
= CPU(cpu
);
1455 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1456 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1463 int kvmppc_get_hasidle(CPUPPCState
*env
)
1465 struct kvm_ppc_pvinfo pvinfo
;
1467 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1468 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1475 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1477 uint32_t *hc
= (uint32_t*)buf
;
1478 struct kvm_ppc_pvinfo pvinfo
;
1480 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1481 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1486 * Fallback to always fail hypercalls:
1502 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1504 CPUState
*cs
= CPU(cpu
);
1507 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1509 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1512 /* Update the capability flag so we sync the right information
1517 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1519 CPUState
*cs
= CPU(cpu
);
1522 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1523 if (ret
&& mpic_proxy
) {
1524 cpu_abort(cs
, "This KVM version does not support EPR\n");
1528 int kvmppc_smt_threads(void)
1530 return cap_ppc_smt
? cap_ppc_smt
: 1;
1534 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1539 struct kvm_allocate_rma ret
;
1540 MemoryRegion
*rma_region
;
1542 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1543 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1544 * not necessary on this hardware
1545 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1547 * FIXME: We should allow the user to force contiguous RMA
1548 * allocation in the cap_ppc_rma==1 case.
1550 if (cap_ppc_rma
< 2) {
1554 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1556 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1561 size
= MIN(ret
.rma_size
, 256ul << 20);
1563 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1564 if (rma
== MAP_FAILED
) {
1565 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1569 rma_region
= g_new(MemoryRegion
, 1);
1570 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1571 vmstate_register_ram_global(rma_region
);
1572 memory_region_add_subregion(sysmem
, 0, rma_region
);
1577 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1579 struct kvm_ppc_smmu_info info
;
1580 long rampagesize
, best_page_shift
;
1583 if (cap_ppc_rma
>= 2) {
1584 return current_size
;
1587 /* Find the largest hardware supported page size that's less than
1588 * or equal to the (logical) backing page size of guest RAM */
1589 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1590 rampagesize
= getrampagesize();
1591 best_page_shift
= 0;
1593 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1594 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1596 if (!sps
->page_shift
) {
1600 if ((sps
->page_shift
> best_page_shift
)
1601 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1602 best_page_shift
= sps
->page_shift
;
1606 return MIN(current_size
,
1607 1ULL << (best_page_shift
+ hash_shift
- 7));
1611 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1613 struct kvm_create_spapr_tce args
= {
1615 .window_size
= window_size
,
1621 /* Must set fd to -1 so we don't try to munmap when called for
1622 * destroying the table, which the upper layers -will- do
1625 if (!cap_spapr_tce
) {
1629 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1631 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1636 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1637 /* FIXME: round this up to page size */
1639 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1640 if (table
== MAP_FAILED
) {
1641 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1651 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1659 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(uint64_t);
1660 if ((munmap(table
, len
) < 0) ||
1662 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1664 /* Leak the table */
1670 int kvmppc_reset_htab(int shift_hint
)
1672 uint32_t shift
= shift_hint
;
1674 if (!kvm_enabled()) {
1675 /* Full emulation, tell caller to allocate htab itself */
1678 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1680 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1681 if (ret
== -ENOTTY
) {
1682 /* At least some versions of PR KVM advertise the
1683 * capability, but don't implement the ioctl(). Oops.
1684 * Return 0 so that we allocate the htab in qemu, as is
1685 * correct for PR. */
1687 } else if (ret
< 0) {
1693 /* We have a kernel that predates the htab reset calls. For PR
1694 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1695 * this era, it has allocated a 16MB fixed size hash table
1696 * already. Kernels of this era have the GET_PVINFO capability
1697 * only on PR, so we use this hack to determine the right
1699 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1700 /* PR - tell caller to allocate htab */
1703 /* HV - assume 16MB kernel allocated htab */
1708 static inline uint32_t mfpvr(void)
1717 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1726 static void kvmppc_host_cpu_initfn(Object
*obj
)
1728 assert(kvm_enabled());
1731 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1733 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1734 uint32_t vmx
= kvmppc_get_vmx();
1735 uint32_t dfp
= kvmppc_get_dfp();
1736 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1737 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1739 /* Now fix up the class with information we can query from the host */
1743 /* Only override when we know what the host supports */
1744 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1745 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1748 /* Only override when we know what the host supports */
1749 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1752 if (dcache_size
!= -1) {
1753 pcc
->l1_dcache_size
= dcache_size
;
1756 if (icache_size
!= -1) {
1757 pcc
->l1_icache_size
= icache_size
;
1761 bool kvmppc_has_cap_epr(void)
1766 bool kvmppc_has_cap_htab_fd(void)
1771 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
1773 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
1775 while (oc
&& !object_class_is_abstract(oc
)) {
1776 oc
= object_class_get_parent(oc
);
1780 return POWERPC_CPU_CLASS(oc
);
1783 static int kvm_ppc_register_host_cpu_type(void)
1785 TypeInfo type_info
= {
1786 .name
= TYPE_HOST_POWERPC_CPU
,
1787 .instance_init
= kvmppc_host_cpu_initfn
,
1788 .class_init
= kvmppc_host_cpu_class_init
,
1790 uint32_t host_pvr
= mfpvr();
1791 PowerPCCPUClass
*pvr_pcc
;
1794 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1795 if (pvr_pcc
== NULL
) {
1796 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
1798 if (pvr_pcc
== NULL
) {
1801 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1802 type_register(&type_info
);
1804 /* Register generic family CPU class for a family */
1805 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
1806 dc
= DEVICE_CLASS(pvr_pcc
);
1807 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1808 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
1809 type_register(&type_info
);
1814 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
1816 struct kvm_rtas_token_args args
= {
1820 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
1824 strncpy(args
.name
, function
, sizeof(args
.name
));
1826 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
1829 int kvmppc_get_htab_fd(bool write
)
1831 struct kvm_get_htab_fd s
= {
1832 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1837 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1841 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1844 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1846 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1847 uint8_t buf
[bufsize
];
1851 rc
= read(fd
, buf
, bufsize
);
1853 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1857 /* Kernel already retuns data in BE format for the file */
1858 qemu_put_buffer(f
, buf
, rc
);
1862 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1864 return (rc
== 0) ? 1 : 0;
1867 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1868 uint16_t n_valid
, uint16_t n_invalid
)
1870 struct kvm_get_htab_header
*buf
;
1871 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1874 buf
= alloca(chunksize
);
1875 /* This is KVM on ppc, so this is all big-endian */
1877 buf
->n_valid
= n_valid
;
1878 buf
->n_invalid
= n_invalid
;
1880 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1882 rc
= write(fd
, buf
, chunksize
);
1884 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1888 if (rc
!= chunksize
) {
1889 /* We should never get a short write on a single chunk */
1890 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1896 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1901 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1906 int kvm_arch_on_sigbus(int code
, void *addr
)
1911 void kvm_arch_init_irq_routing(KVMState
*s
)
1915 int kvm_arch_insert_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1920 int kvm_arch_remove_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1925 int kvm_arch_insert_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1930 int kvm_arch_remove_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1935 void kvm_arch_remove_all_hw_breakpoints(void)
1939 void kvm_arch_update_guest_debug(CPUState
*cpu
, struct kvm_guest_debug
*dbg
)
1943 struct kvm_get_htab_buf
{
1944 struct kvm_get_htab_header header
;
1946 * We require one extra byte for read
1948 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
1951 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
1954 struct kvm_get_htab_fd ghf
;
1955 struct kvm_get_htab_buf
*hpte_buf
;
1958 ghf
.start_index
= pte_index
;
1959 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
1964 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
1966 * Read the hpte group
1968 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
1973 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
1982 void kvmppc_hash64_free_pteg(uint64_t token
)
1984 struct kvm_get_htab_buf
*htab_buf
;
1986 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
1992 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
1993 target_ulong pte0
, target_ulong pte1
)
1996 struct kvm_get_htab_fd ghf
;
1997 struct kvm_get_htab_buf hpte_buf
;
2000 ghf
.start_index
= 0; /* Ignored */
2001 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2006 hpte_buf
.header
.n_valid
= 1;
2007 hpte_buf
.header
.n_invalid
= 0;
2008 hpte_buf
.header
.index
= pte_index
;
2009 hpte_buf
.hpte
[0] = pte0
;
2010 hpte_buf
.hpte
[1] = pte1
;
2012 * Write the hpte entry.
2013 * CAUTION: write() has the warn_unused_result attribute. Hence we
2014 * need to check the return value, even though we do nothing.
2016 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {