2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
36 #include "hw/sysbus.h"
38 #include "hw/spapr_vio.h"
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #define dprintf(fmt, ...) \
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
56 static int cap_interrupt_unset
= false;
57 static int cap_interrupt_level
= false;
58 static int cap_segstate
;
59 static int cap_booke_sregs
;
60 static int cap_ppc_smt
;
61 static int cap_ppc_rma
;
62 static int cap_spapr_tce
;
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
74 static QEMUTimer
*idle_timer
;
76 static void kvm_kick_cpu(void *opaque
)
78 PowerPCCPU
*cpu
= opaque
;
80 qemu_cpu_kick(CPU(cpu
));
83 int kvm_arch_init(KVMState
*s
)
85 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
86 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
87 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
88 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
89 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
90 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
91 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
92 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
94 if (!cap_interrupt_level
) {
95 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
102 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
104 CPUPPCState
*cenv
= &cpu
->env
;
105 CPUState
*cs
= CPU(cpu
);
106 struct kvm_sregs sregs
;
109 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
110 /* What we're really trying to say is "if we're on BookE, we use
111 the native PVR for now". This is the only sane way to check
112 it though, so we potentially confuse users that they can run
113 BookE guests on BookS. Let's hope nobody dares enough :) */
117 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
122 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
127 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
128 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
131 /* Set up a shared TLB array with KVM */
132 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
134 CPUPPCState
*env
= &cpu
->env
;
135 CPUState
*cs
= CPU(cpu
);
136 struct kvm_book3e_206_tlb_params params
= {};
137 struct kvm_config_tlb cfg
= {};
138 struct kvm_enable_cap encap
= {};
139 unsigned int entries
= 0;
142 if (!kvm_enabled() ||
143 !kvm_check_extension(env
->kvm_state
, KVM_CAP_SW_TLB
)) {
147 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
149 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
150 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
151 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
152 entries
+= params
.tlb_sizes
[i
];
155 assert(entries
== env
->nb_tlb
);
156 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
158 env
->tlb_dirty
= true;
160 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
161 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
162 cfg
.params
= (uintptr_t)¶ms
;
163 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
165 encap
.cap
= KVM_CAP_SW_TLB
;
166 encap
.args
[0] = (uintptr_t)&cfg
;
168 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
170 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
171 __func__
, strerror(-ret
));
175 env
->kvm_sw_tlb
= true;
180 #if defined(TARGET_PPC64)
181 static void kvm_get_fallback_smmu_info(CPUPPCState
*env
,
182 struct kvm_ppc_smmu_info
*info
)
184 memset(info
, 0, sizeof(*info
));
186 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
187 * need to "guess" what the supported page sizes are.
189 * For that to work we make a few assumptions:
191 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
192 * KVM which only supports 4K and 16M pages, but supports them
193 * regardless of the backing store characteritics. We also don't
194 * support 1T segments.
196 * This is safe as if HV KVM ever supports that capability or PR
197 * KVM grows supports for more page/segment sizes, those versions
198 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
199 * will not hit this fallback
201 * - Else we are running HV KVM. This means we only support page
202 * sizes that fit in the backing store. Additionally we only
203 * advertize 64K pages if the processor is ARCH 2.06 and we assume
204 * P7 encodings for the SLB and hash table. Here too, we assume
205 * support for any newer processor will mean a kernel that
206 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
209 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
214 /* Standard 4k base page size segment */
215 info
->sps
[0].page_shift
= 12;
216 info
->sps
[0].slb_enc
= 0;
217 info
->sps
[0].enc
[0].page_shift
= 12;
218 info
->sps
[0].enc
[0].pte_enc
= 0;
220 /* Standard 16M large page size segment */
221 info
->sps
[1].page_shift
= 24;
222 info
->sps
[1].slb_enc
= SLB_VSID_L
;
223 info
->sps
[1].enc
[0].page_shift
= 24;
224 info
->sps
[1].enc
[0].pte_enc
= 0;
228 /* HV KVM has backing store size restrictions */
229 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
231 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
232 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
235 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
241 /* Standard 4k base page size segment */
242 info
->sps
[i
].page_shift
= 12;
243 info
->sps
[i
].slb_enc
= 0;
244 info
->sps
[i
].enc
[0].page_shift
= 12;
245 info
->sps
[i
].enc
[0].pte_enc
= 0;
248 /* 64K on MMU 2.06 */
249 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
250 info
->sps
[i
].page_shift
= 16;
251 info
->sps
[i
].slb_enc
= 0x110;
252 info
->sps
[i
].enc
[0].page_shift
= 16;
253 info
->sps
[i
].enc
[0].pte_enc
= 1;
257 /* Standard 16M large page size segment */
258 info
->sps
[i
].page_shift
= 24;
259 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
260 info
->sps
[i
].enc
[0].page_shift
= 24;
261 info
->sps
[i
].enc
[0].pte_enc
= 0;
265 static void kvm_get_smmu_info(CPUPPCState
*env
, struct kvm_ppc_smmu_info
*info
)
269 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
270 ret
= kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
276 kvm_get_fallback_smmu_info(env
, info
);
279 static long getrampagesize(void)
285 /* guest RAM is backed by normal anonymous pages */
286 return getpagesize();
290 ret
= statfs(mem_path
, &fs
);
291 } while (ret
!= 0 && errno
== EINTR
);
294 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
299 #define HUGETLBFS_MAGIC 0x958458f6
301 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
302 /* Explicit mempath, but it's ordinary pages */
303 return getpagesize();
306 /* It's hugepage, return the huge page size */
310 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
312 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
316 return (1ul << shift
) <= rampgsize
;
319 static void kvm_fixup_page_sizes(CPUPPCState
*env
)
321 static struct kvm_ppc_smmu_info smmu_info
;
322 static bool has_smmu_info
;
326 /* We only handle page sizes for 64-bit server guests for now */
327 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
331 /* Collect MMU info from kernel if not already */
332 if (!has_smmu_info
) {
333 kvm_get_smmu_info(env
, &smmu_info
);
334 has_smmu_info
= true;
337 rampagesize
= getrampagesize();
339 /* Convert to QEMU form */
340 memset(&env
->sps
, 0, sizeof(env
->sps
));
342 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
343 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
344 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
346 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
350 qsps
->page_shift
= ksps
->page_shift
;
351 qsps
->slb_enc
= ksps
->slb_enc
;
352 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
353 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
354 ksps
->enc
[jk
].page_shift
)) {
357 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
358 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
359 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
363 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
367 env
->slb_nr
= smmu_info
.slb_size
;
368 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
369 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
371 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
374 #else /* defined (TARGET_PPC64) */
376 static inline void kvm_fixup_page_sizes(CPUPPCState
*env
)
380 #endif /* !defined (TARGET_PPC64) */
382 int kvm_arch_init_vcpu(CPUState
*cs
)
384 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
385 CPUPPCState
*cenv
= &cpu
->env
;
388 /* Gather server mmu info from KVM and update the CPU state */
389 kvm_fixup_page_sizes(cenv
);
391 /* Synchronize sregs with kvm */
392 ret
= kvm_arch_sync_sregs(cpu
);
397 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_cpu
, cpu
);
399 /* Some targets support access to KVM's guest TLB. */
400 switch (cenv
->mmu_model
) {
401 case POWERPC_MMU_BOOKE206
:
402 ret
= kvm_booke206_tlb_init(cpu
);
411 void kvm_arch_reset_vcpu(CPUState
*cpu
)
415 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
417 CPUPPCState
*env
= &cpu
->env
;
418 CPUState
*cs
= CPU(cpu
);
419 struct kvm_dirty_tlb dirty_tlb
;
420 unsigned char *bitmap
;
423 if (!env
->kvm_sw_tlb
) {
427 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
428 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
430 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
431 dirty_tlb
.num_dirty
= env
->nb_tlb
;
433 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
435 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
436 __func__
, strerror(-ret
));
442 int kvm_arch_put_registers(CPUState
*cs
, int level
)
444 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
445 CPUPPCState
*env
= &cpu
->env
;
446 struct kvm_regs regs
;
450 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
461 regs
.srr0
= env
->spr
[SPR_SRR0
];
462 regs
.srr1
= env
->spr
[SPR_SRR1
];
464 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
465 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
466 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
467 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
468 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
469 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
470 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
471 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
473 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
475 for (i
= 0;i
< 32; i
++)
476 regs
.gpr
[i
] = env
->gpr
[i
];
478 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
482 if (env
->tlb_dirty
) {
484 env
->tlb_dirty
= false;
487 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
488 struct kvm_sregs sregs
;
490 sregs
.pvr
= env
->spr
[SPR_PVR
];
492 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
496 for (i
= 0; i
< 64; i
++) {
497 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
498 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
503 for (i
= 0; i
< 16; i
++) {
504 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
508 for (i
= 0; i
< 8; i
++) {
509 /* Beware. We have to swap upper and lower bits here */
510 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
512 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
516 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
522 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
523 uint64_t hior
= env
->spr
[SPR_HIOR
];
524 struct kvm_one_reg reg
= {
525 .id
= KVM_REG_PPC_HIOR
,
526 .addr
= (uintptr_t) &hior
,
529 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
538 int kvm_arch_get_registers(CPUState
*cs
)
540 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
541 CPUPPCState
*env
= &cpu
->env
;
542 struct kvm_regs regs
;
543 struct kvm_sregs sregs
;
547 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
552 for (i
= 7; i
>= 0; i
--) {
553 env
->crf
[i
] = cr
& 15;
563 env
->spr
[SPR_SRR0
] = regs
.srr0
;
564 env
->spr
[SPR_SRR1
] = regs
.srr1
;
566 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
567 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
568 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
569 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
570 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
571 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
572 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
573 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
575 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
577 for (i
= 0;i
< 32; i
++)
578 env
->gpr
[i
] = regs
.gpr
[i
];
580 if (cap_booke_sregs
) {
581 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
586 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
587 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
588 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
589 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
590 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
591 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
592 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
593 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
594 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
595 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
596 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
597 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
600 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
601 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
602 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
603 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
604 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
605 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
608 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
609 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
612 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
613 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
616 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
617 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
618 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
619 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
620 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
621 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
622 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
623 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
624 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
625 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
626 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
627 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
628 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
629 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
630 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
631 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
632 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
634 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
635 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
636 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
637 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
640 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
641 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
644 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
645 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
646 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
650 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
651 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
652 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
653 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
654 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
655 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
656 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
657 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
658 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
659 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
660 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
663 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
664 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
667 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
668 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
669 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
672 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
673 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
674 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
675 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
677 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
678 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
679 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
685 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
690 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
694 for (i
= 0; i
< 64; i
++) {
695 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
696 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
701 for (i
= 0; i
< 16; i
++) {
702 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
706 for (i
= 0; i
< 8; i
++) {
707 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
708 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
709 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
710 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
717 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
719 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
721 if (irq
!= PPC_INTERRUPT_EXT
) {
725 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
729 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
734 #if defined(TARGET_PPCEMB)
735 #define PPC_INPUT_INT PPC40x_INPUT_INT
736 #elif defined(TARGET_PPC64)
737 #define PPC_INPUT_INT PPC970_INPUT_INT
739 #define PPC_INPUT_INT PPC6xx_INPUT_INT
742 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
744 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
745 CPUPPCState
*env
= &cpu
->env
;
749 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
750 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
751 if (!cap_interrupt_level
&&
752 run
->ready_for_interrupt_injection
&&
753 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
754 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
756 /* For now KVM disregards the 'irq' argument. However, in the
757 * future KVM could cache it in-kernel to avoid a heavyweight exit
758 * when reading the UIC.
760 irq
= KVM_INTERRUPT_SET
;
762 dprintf("injected interrupt %d\n", irq
);
763 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
765 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
767 /* Always wake up soon in case the interrupt was level based */
768 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
769 (get_ticks_per_sec() / 50));
772 /* We don't know if there are more interrupts pending after this. However,
773 * the guest will return to userspace in the course of handling this one
774 * anyways, so we will get a chance to deliver the rest. */
777 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
781 int kvm_arch_process_async_events(CPUState
*cs
)
783 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
784 return cpu
->env
.halted
;
787 static int kvmppc_handle_halt(CPUPPCState
*env
)
789 if (!(env
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
791 env
->exception_index
= EXCP_HLT
;
797 /* map dcr access to existing qemu dcr emulation */
798 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
800 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
801 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
806 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
808 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
809 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
814 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
816 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
817 CPUPPCState
*env
= &cpu
->env
;
820 switch (run
->exit_reason
) {
822 if (run
->dcr
.is_write
) {
823 dprintf("handle dcr write\n");
824 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
826 dprintf("handle dcr read\n");
827 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
831 dprintf("handle halt\n");
832 ret
= kvmppc_handle_halt(env
);
834 #ifdef CONFIG_PSERIES
835 case KVM_EXIT_PAPR_HCALL
:
836 dprintf("handle PAPR hypercall\n");
837 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
839 run
->papr_hcall
.args
);
844 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
852 static int read_cpuinfo(const char *field
, char *value
, int len
)
856 int field_len
= strlen(field
);
859 f
= fopen("/proc/cpuinfo", "r");
865 if(!fgets(line
, sizeof(line
), f
)) {
868 if (!strncmp(line
, field
, field_len
)) {
869 pstrcpy(value
, len
, line
);
880 uint32_t kvmppc_get_tbfreq(void)
884 uint32_t retval
= get_ticks_per_sec();
886 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
890 if (!(ns
= strchr(line
, ':'))) {
900 /* Try to find a device tree node for a CPU with clock-frequency property */
901 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
906 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
907 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
912 while ((dirp
= readdir(dp
)) != NULL
) {
914 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
918 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
925 if (buf
[0] == '\0') {
926 printf("Unknown host!\n");
933 /* Read a CPU node property from the host device tree that's a single
934 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
935 * (can't find or open the property, or doesn't understand the
937 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
947 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
951 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
952 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
954 f
= fopen(buf
, "rb");
959 len
= fread(&u
, 1, sizeof(u
), f
);
963 /* property is a 32-bit quantity */
964 return be32_to_cpu(u
.v32
);
966 return be64_to_cpu(u
.v64
);
972 uint64_t kvmppc_get_clockfreq(void)
974 return kvmppc_read_int_cpu_dt("clock-frequency");
977 uint32_t kvmppc_get_vmx(void)
979 return kvmppc_read_int_cpu_dt("ibm,vmx");
982 uint32_t kvmppc_get_dfp(void)
984 return kvmppc_read_int_cpu_dt("ibm,dfp");
987 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
989 uint32_t *hc
= (uint32_t*)buf
;
991 struct kvm_ppc_pvinfo pvinfo
;
993 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
994 !kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_PVINFO
, &pvinfo
)) {
995 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1001 * Fallback to always fail hypercalls:
1017 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1019 CPUPPCState
*env
= &cpu
->env
;
1020 CPUState
*cs
= CPU(cpu
);
1021 struct kvm_enable_cap cap
= {};
1024 cap
.cap
= KVM_CAP_PPC_PAPR
;
1025 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1028 cpu_abort(env
, "This KVM version does not support PAPR\n");
1032 int kvmppc_smt_threads(void)
1034 return cap_ppc_smt
? cap_ppc_smt
: 1;
1038 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1043 struct kvm_allocate_rma ret
;
1044 MemoryRegion
*rma_region
;
1046 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1047 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1048 * not necessary on this hardware
1049 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1051 * FIXME: We should allow the user to force contiguous RMA
1052 * allocation in the cap_ppc_rma==1 case.
1054 if (cap_ppc_rma
< 2) {
1058 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1060 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1065 size
= MIN(ret
.rma_size
, 256ul << 20);
1067 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1068 if (rma
== MAP_FAILED
) {
1069 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1073 rma_region
= g_new(MemoryRegion
, 1);
1074 memory_region_init_ram_ptr(rma_region
, name
, size
, rma
);
1075 vmstate_register_ram_global(rma_region
);
1076 memory_region_add_subregion(sysmem
, 0, rma_region
);
1081 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1083 if (cap_ppc_rma
>= 2) {
1084 return current_size
;
1086 return MIN(current_size
,
1087 getrampagesize() << (hash_shift
- 7));
1091 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1093 struct kvm_create_spapr_tce args
= {
1095 .window_size
= window_size
,
1101 /* Must set fd to -1 so we don't try to munmap when called for
1102 * destroying the table, which the upper layers -will- do
1105 if (!cap_spapr_tce
) {
1109 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1111 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1116 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(sPAPRTCE
);
1117 /* FIXME: round this up to page size */
1119 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1120 if (table
== MAP_FAILED
) {
1121 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1131 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1139 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(sPAPRTCE
);
1140 if ((munmap(table
, len
) < 0) ||
1142 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1144 /* Leak the table */
1150 int kvmppc_reset_htab(int shift_hint
)
1152 uint32_t shift
= shift_hint
;
1154 if (!kvm_enabled()) {
1155 /* Full emulation, tell caller to allocate htab itself */
1158 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1160 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1161 if (ret
== -ENOTTY
) {
1162 /* At least some versions of PR KVM advertise the
1163 * capability, but don't implement the ioctl(). Oops.
1164 * Return 0 so that we allocate the htab in qemu, as is
1165 * correct for PR. */
1167 } else if (ret
< 0) {
1173 /* We have a kernel that predates the htab reset calls. For PR
1174 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1175 * this era, it has allocated a 16MB fixed size hash table
1176 * already. Kernels of this era have the GET_PVINFO capability
1177 * only on PR, so we use this hack to determine the right
1179 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1180 /* PR - tell caller to allocate htab */
1183 /* HV - assume 16MB kernel allocated htab */
1188 static inline uint32_t mfpvr(void)
1197 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1206 const ppc_def_t
*kvmppc_host_cpu_def(void)
1208 uint32_t host_pvr
= mfpvr();
1209 const ppc_def_t
*base_spec
;
1211 uint32_t vmx
= kvmppc_get_vmx();
1212 uint32_t dfp
= kvmppc_get_dfp();
1214 base_spec
= ppc_find_by_pvr(host_pvr
);
1216 spec
= g_malloc0(sizeof(*spec
));
1217 memcpy(spec
, base_spec
, sizeof(*spec
));
1219 /* Now fix up the spec with information we can query from the host */
1222 /* Only override when we know what the host supports */
1223 alter_insns(&spec
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1224 alter_insns(&spec
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1227 /* Only override when we know what the host supports */
1228 alter_insns(&spec
->insns_flags2
, PPC2_DFP
, dfp
);
1234 int kvmppc_fixup_cpu(CPUPPCState
*env
)
1238 /* Adjust cpu index for SMT */
1239 smt
= kvmppc_smt_threads();
1240 env
->cpu_index
= (env
->cpu_index
/ smp_threads
) * smt
1241 + (env
->cpu_index
% smp_threads
);
1247 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1252 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1257 int kvm_arch_on_sigbus(int code
, void *addr
)