2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
36 #include "hw/sysbus.h"
38 #include "hw/spapr_vio.h"
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #define dprintf(fmt, ...) \
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
56 static int cap_interrupt_unset
= false;
57 static int cap_interrupt_level
= false;
58 static int cap_segstate
;
59 static int cap_booke_sregs
;
60 static int cap_ppc_smt
;
61 static int cap_ppc_rma
;
62 static int cap_spapr_tce
;
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
74 static QEMUTimer
*idle_timer
;
76 static void kvm_kick_env(void *env
)
81 int kvm_arch_init(KVMState
*s
)
83 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
84 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
85 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
86 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
87 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
88 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
89 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
90 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
92 if (!cap_interrupt_level
) {
93 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
94 "VM to stall at times!\n");
100 static int kvm_arch_sync_sregs(CPUPPCState
*cenv
)
102 struct kvm_sregs sregs
;
105 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
106 /* What we're really trying to say is "if we're on BookE, we use
107 the native PVR for now". This is the only sane way to check
108 it though, so we potentially confuse users that they can run
109 BookE guests on BookS. Let's hope nobody dares enough :) */
113 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
118 ret
= kvm_vcpu_ioctl(cenv
, KVM_GET_SREGS
, &sregs
);
123 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
124 return kvm_vcpu_ioctl(cenv
, KVM_SET_SREGS
, &sregs
);
127 /* Set up a shared TLB array with KVM */
128 static int kvm_booke206_tlb_init(CPUPPCState
*env
)
130 struct kvm_book3e_206_tlb_params params
= {};
131 struct kvm_config_tlb cfg
= {};
132 struct kvm_enable_cap encap
= {};
133 unsigned int entries
= 0;
136 if (!kvm_enabled() ||
137 !kvm_check_extension(env
->kvm_state
, KVM_CAP_SW_TLB
)) {
141 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
143 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
144 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
145 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
146 entries
+= params
.tlb_sizes
[i
];
149 assert(entries
== env
->nb_tlb
);
150 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
152 env
->tlb_dirty
= true;
154 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
155 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
156 cfg
.params
= (uintptr_t)¶ms
;
157 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
159 encap
.cap
= KVM_CAP_SW_TLB
;
160 encap
.args
[0] = (uintptr_t)&cfg
;
162 ret
= kvm_vcpu_ioctl(env
, KVM_ENABLE_CAP
, &encap
);
164 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
165 __func__
, strerror(-ret
));
169 env
->kvm_sw_tlb
= true;
174 #if defined(TARGET_PPC64)
175 static void kvm_get_fallback_smmu_info(CPUPPCState
*env
,
176 struct kvm_ppc_smmu_info
*info
)
178 memset(info
, 0, sizeof(*info
));
180 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
181 * need to "guess" what the supported page sizes are.
183 * For that to work we make a few assumptions:
185 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
186 * KVM which only supports 4K and 16M pages, but supports them
187 * regardless of the backing store characteritics. We also don't
188 * support 1T segments.
190 * This is safe as if HV KVM ever supports that capability or PR
191 * KVM grows supports for more page/segment sizes, those versions
192 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
193 * will not hit this fallback
195 * - Else we are running HV KVM. This means we only support page
196 * sizes that fit in the backing store. Additionally we only
197 * advertize 64K pages if the processor is ARCH 2.06 and we assume
198 * P7 encodings for the SLB and hash table. Here too, we assume
199 * support for any newer processor will mean a kernel that
200 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
203 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
208 /* Standard 4k base page size segment */
209 info
->sps
[0].page_shift
= 12;
210 info
->sps
[0].slb_enc
= 0;
211 info
->sps
[0].enc
[0].page_shift
= 12;
212 info
->sps
[0].enc
[0].pte_enc
= 0;
214 /* Standard 16M large page size segment */
215 info
->sps
[1].page_shift
= 24;
216 info
->sps
[1].slb_enc
= SLB_VSID_L
;
217 info
->sps
[1].enc
[0].page_shift
= 24;
218 info
->sps
[1].enc
[0].pte_enc
= 0;
222 /* HV KVM has backing store size restrictions */
223 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
225 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
226 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
229 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
235 /* Standard 4k base page size segment */
236 info
->sps
[i
].page_shift
= 12;
237 info
->sps
[i
].slb_enc
= 0;
238 info
->sps
[i
].enc
[0].page_shift
= 12;
239 info
->sps
[i
].enc
[0].pte_enc
= 0;
242 /* 64K on MMU 2.06 */
243 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
244 info
->sps
[i
].page_shift
= 16;
245 info
->sps
[i
].slb_enc
= 0x110;
246 info
->sps
[i
].enc
[0].page_shift
= 16;
247 info
->sps
[i
].enc
[0].pte_enc
= 1;
251 /* Standard 16M large page size segment */
252 info
->sps
[i
].page_shift
= 24;
253 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
254 info
->sps
[i
].enc
[0].page_shift
= 24;
255 info
->sps
[i
].enc
[0].pte_enc
= 0;
259 static void kvm_get_smmu_info(CPUPPCState
*env
, struct kvm_ppc_smmu_info
*info
)
263 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
264 ret
= kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
270 kvm_get_fallback_smmu_info(env
, info
);
273 static long getrampagesize(void)
279 /* guest RAM is backed by normal anonymous pages */
280 return getpagesize();
284 ret
= statfs(mem_path
, &fs
);
285 } while (ret
!= 0 && errno
== EINTR
);
288 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
293 #define HUGETLBFS_MAGIC 0x958458f6
295 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
296 /* Explicit mempath, but it's ordinary pages */
297 return getpagesize();
300 /* It's hugepage, return the huge page size */
304 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
306 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
310 return (1ul << shift
) <= rampgsize
;
313 static void kvm_fixup_page_sizes(CPUPPCState
*env
)
315 static struct kvm_ppc_smmu_info smmu_info
;
316 static bool has_smmu_info
;
320 /* We only handle page sizes for 64-bit server guests for now */
321 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
325 /* Collect MMU info from kernel if not already */
326 if (!has_smmu_info
) {
327 kvm_get_smmu_info(env
, &smmu_info
);
328 has_smmu_info
= true;
331 rampagesize
= getrampagesize();
333 /* Convert to QEMU form */
334 memset(&env
->sps
, 0, sizeof(env
->sps
));
336 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
337 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
338 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
340 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
344 qsps
->page_shift
= ksps
->page_shift
;
345 qsps
->slb_enc
= ksps
->slb_enc
;
346 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
347 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
348 ksps
->enc
[jk
].page_shift
)) {
351 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
352 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
353 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
357 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
361 env
->slb_nr
= smmu_info
.slb_size
;
362 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
363 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
365 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
368 #else /* defined (TARGET_PPC64) */
370 static inline void kvm_fixup_page_sizes(CPUPPCState
*env
)
374 #endif /* !defined (TARGET_PPC64) */
376 int kvm_arch_init_vcpu(CPUPPCState
*cenv
)
380 /* Gather server mmu info from KVM and update the CPU state */
381 kvm_fixup_page_sizes(cenv
);
383 /* Synchronize sregs with kvm */
384 ret
= kvm_arch_sync_sregs(cenv
);
389 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_env
, cenv
);
391 /* Some targets support access to KVM's guest TLB. */
392 switch (cenv
->mmu_model
) {
393 case POWERPC_MMU_BOOKE206
:
394 ret
= kvm_booke206_tlb_init(cenv
);
403 void kvm_arch_reset_vcpu(CPUPPCState
*env
)
407 static void kvm_sw_tlb_put(CPUPPCState
*env
)
409 struct kvm_dirty_tlb dirty_tlb
;
410 unsigned char *bitmap
;
413 if (!env
->kvm_sw_tlb
) {
417 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
418 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
420 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
421 dirty_tlb
.num_dirty
= env
->nb_tlb
;
423 ret
= kvm_vcpu_ioctl(env
, KVM_DIRTY_TLB
, &dirty_tlb
);
425 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
426 __func__
, strerror(-ret
));
432 int kvm_arch_put_registers(CPUPPCState
*env
, int level
)
434 struct kvm_regs regs
;
438 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
448 regs
.srr0
= env
->spr
[SPR_SRR0
];
449 regs
.srr1
= env
->spr
[SPR_SRR1
];
451 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
452 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
453 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
454 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
455 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
456 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
457 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
458 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
460 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
462 for (i
= 0;i
< 32; i
++)
463 regs
.gpr
[i
] = env
->gpr
[i
];
465 ret
= kvm_vcpu_ioctl(env
, KVM_SET_REGS
, ®s
);
469 if (env
->tlb_dirty
) {
471 env
->tlb_dirty
= false;
474 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
475 struct kvm_sregs sregs
;
477 sregs
.pvr
= env
->spr
[SPR_PVR
];
479 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
483 for (i
= 0; i
< 64; i
++) {
484 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
485 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
490 for (i
= 0; i
< 16; i
++) {
491 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
495 for (i
= 0; i
< 8; i
++) {
496 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[1][i
] << 32)
498 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[1][i
] << 32)
502 ret
= kvm_vcpu_ioctl(env
, KVM_SET_SREGS
, &sregs
);
508 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
509 uint64_t hior
= env
->spr
[SPR_HIOR
];
510 struct kvm_one_reg reg
= {
511 .id
= KVM_REG_PPC_HIOR
,
512 .addr
= (uintptr_t) &hior
,
515 ret
= kvm_vcpu_ioctl(env
, KVM_SET_ONE_REG
, ®
);
524 int kvm_arch_get_registers(CPUPPCState
*env
)
526 struct kvm_regs regs
;
527 struct kvm_sregs sregs
;
531 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
536 for (i
= 7; i
>= 0; i
--) {
537 env
->crf
[i
] = cr
& 15;
547 env
->spr
[SPR_SRR0
] = regs
.srr0
;
548 env
->spr
[SPR_SRR1
] = regs
.srr1
;
550 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
551 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
552 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
553 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
554 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
555 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
556 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
557 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
559 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
561 for (i
= 0;i
< 32; i
++)
562 env
->gpr
[i
] = regs
.gpr
[i
];
564 if (cap_booke_sregs
) {
565 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
570 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
571 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
572 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
573 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
574 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
575 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
576 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
577 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
578 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
579 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
580 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
581 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
584 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
585 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
586 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
587 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
588 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
589 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
592 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
593 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
596 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
597 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
600 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
601 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
602 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
603 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
604 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
605 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
606 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
607 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
608 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
609 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
610 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
611 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
612 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
613 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
614 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
615 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
616 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
618 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
619 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
620 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
621 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
624 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
625 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
628 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
629 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
630 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
634 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
635 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
636 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
637 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
638 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
639 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
640 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
641 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
642 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
643 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
644 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
647 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
648 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
651 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
652 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
653 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
656 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
657 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
658 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
659 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
661 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
662 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
663 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
669 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
674 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
678 for (i
= 0; i
< 64; i
++) {
679 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
680 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
685 for (i
= 0; i
< 16; i
++) {
686 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
690 for (i
= 0; i
< 8; i
++) {
691 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
692 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
693 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
694 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
701 int kvmppc_set_interrupt(CPUPPCState
*env
, int irq
, int level
)
703 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
705 if (irq
!= PPC_INTERRUPT_EXT
) {
709 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
713 kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &virq
);
718 #if defined(TARGET_PPCEMB)
719 #define PPC_INPUT_INT PPC40x_INPUT_INT
720 #elif defined(TARGET_PPC64)
721 #define PPC_INPUT_INT PPC970_INPUT_INT
723 #define PPC_INPUT_INT PPC6xx_INPUT_INT
726 void kvm_arch_pre_run(CPUPPCState
*env
, struct kvm_run
*run
)
731 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
732 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
733 if (!cap_interrupt_level
&&
734 run
->ready_for_interrupt_injection
&&
735 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
736 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
738 /* For now KVM disregards the 'irq' argument. However, in the
739 * future KVM could cache it in-kernel to avoid a heavyweight exit
740 * when reading the UIC.
742 irq
= KVM_INTERRUPT_SET
;
744 dprintf("injected interrupt %d\n", irq
);
745 r
= kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &irq
);
747 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
749 /* Always wake up soon in case the interrupt was level based */
750 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
751 (get_ticks_per_sec() / 50));
754 /* We don't know if there are more interrupts pending after this. However,
755 * the guest will return to userspace in the course of handling this one
756 * anyways, so we will get a chance to deliver the rest. */
759 void kvm_arch_post_run(CPUPPCState
*env
, struct kvm_run
*run
)
763 int kvm_arch_process_async_events(CPUPPCState
*env
)
768 static int kvmppc_handle_halt(CPUPPCState
*env
)
770 if (!(env
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
772 env
->exception_index
= EXCP_HLT
;
778 /* map dcr access to existing qemu dcr emulation */
779 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
781 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
782 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
787 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
789 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
790 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
795 int kvm_arch_handle_exit(CPUPPCState
*env
, struct kvm_run
*run
)
799 switch (run
->exit_reason
) {
801 if (run
->dcr
.is_write
) {
802 dprintf("handle dcr write\n");
803 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
805 dprintf("handle dcr read\n");
806 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
810 dprintf("handle halt\n");
811 ret
= kvmppc_handle_halt(env
);
813 #ifdef CONFIG_PSERIES
814 case KVM_EXIT_PAPR_HCALL
:
815 dprintf("handle PAPR hypercall\n");
816 run
->papr_hcall
.ret
= spapr_hypercall(env
, run
->papr_hcall
.nr
,
817 run
->papr_hcall
.args
);
822 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
830 static int read_cpuinfo(const char *field
, char *value
, int len
)
834 int field_len
= strlen(field
);
837 f
= fopen("/proc/cpuinfo", "r");
843 if(!fgets(line
, sizeof(line
), f
)) {
846 if (!strncmp(line
, field
, field_len
)) {
847 strncpy(value
, line
, len
);
858 uint32_t kvmppc_get_tbfreq(void)
862 uint32_t retval
= get_ticks_per_sec();
864 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
868 if (!(ns
= strchr(line
, ':'))) {
878 /* Try to find a device tree node for a CPU with clock-frequency property */
879 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
884 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
885 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
890 while ((dirp
= readdir(dp
)) != NULL
) {
892 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
896 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
903 if (buf
[0] == '\0') {
904 printf("Unknown host!\n");
911 /* Read a CPU node property from the host device tree that's a single
912 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
913 * (can't find or open the property, or doesn't understand the
915 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
925 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
929 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
930 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
932 f
= fopen(buf
, "rb");
937 len
= fread(&u
, 1, sizeof(u
), f
);
941 /* property is a 32-bit quantity */
942 return be32_to_cpu(u
.v32
);
944 return be64_to_cpu(u
.v64
);
950 uint64_t kvmppc_get_clockfreq(void)
952 return kvmppc_read_int_cpu_dt("clock-frequency");
955 uint32_t kvmppc_get_vmx(void)
957 return kvmppc_read_int_cpu_dt("ibm,vmx");
960 uint32_t kvmppc_get_dfp(void)
962 return kvmppc_read_int_cpu_dt("ibm,dfp");
965 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
967 uint32_t *hc
= (uint32_t*)buf
;
969 struct kvm_ppc_pvinfo pvinfo
;
971 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
972 !kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_PVINFO
, &pvinfo
)) {
973 memcpy(buf
, pvinfo
.hcall
, buf_len
);
979 * Fallback to always fail hypercalls:
995 void kvmppc_set_papr(CPUPPCState
*env
)
997 struct kvm_enable_cap cap
= {};
1000 cap
.cap
= KVM_CAP_PPC_PAPR
;
1001 ret
= kvm_vcpu_ioctl(env
, KVM_ENABLE_CAP
, &cap
);
1004 cpu_abort(env
, "This KVM version does not support PAPR\n");
1008 int kvmppc_smt_threads(void)
1010 return cap_ppc_smt
? cap_ppc_smt
: 1;
1014 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1019 struct kvm_allocate_rma ret
;
1020 MemoryRegion
*rma_region
;
1022 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1023 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1024 * not necessary on this hardware
1025 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1027 * FIXME: We should allow the user to force contiguous RMA
1028 * allocation in the cap_ppc_rma==1 case.
1030 if (cap_ppc_rma
< 2) {
1034 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1036 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1041 size
= MIN(ret
.rma_size
, 256ul << 20);
1043 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1044 if (rma
== MAP_FAILED
) {
1045 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1049 rma_region
= g_new(MemoryRegion
, 1);
1050 memory_region_init_ram_ptr(rma_region
, name
, size
, rma
);
1051 vmstate_register_ram_global(rma_region
);
1052 memory_region_add_subregion(sysmem
, 0, rma_region
);
1057 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1059 if (cap_ppc_rma
>= 2) {
1060 return current_size
;
1062 return MIN(current_size
,
1063 getrampagesize() << (hash_shift
- 7));
1067 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1069 struct kvm_create_spapr_tce args
= {
1071 .window_size
= window_size
,
1077 /* Must set fd to -1 so we don't try to munmap when called for
1078 * destroying the table, which the upper layers -will- do
1081 if (!cap_spapr_tce
) {
1085 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1087 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1092 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(sPAPRTCE
);
1093 /* FIXME: round this up to page size */
1095 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1096 if (table
== MAP_FAILED
) {
1097 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1107 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1115 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(sPAPRTCE
);
1116 if ((munmap(table
, len
) < 0) ||
1118 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1120 /* Leak the table */
1126 int kvmppc_reset_htab(int shift_hint
)
1128 uint32_t shift
= shift_hint
;
1130 if (!kvm_enabled()) {
1131 /* Full emulation, tell caller to allocate htab itself */
1134 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1136 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1137 if (ret
== -ENOTTY
) {
1138 /* At least some versions of PR KVM advertise the
1139 * capability, but don't implement the ioctl(). Oops.
1140 * Return 0 so that we allocate the htab in qemu, as is
1141 * correct for PR. */
1143 } else if (ret
< 0) {
1149 /* We have a kernel that predates the htab reset calls. For PR
1150 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1151 * this era, it has allocated a 16MB fixed size hash table
1152 * already. Kernels of this era have the GET_PVINFO capability
1153 * only on PR, so we use this hack to determine the right
1155 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1156 /* PR - tell caller to allocate htab */
1159 /* HV - assume 16MB kernel allocated htab */
1164 static inline uint32_t mfpvr(void)
1173 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1182 const ppc_def_t
*kvmppc_host_cpu_def(void)
1184 uint32_t host_pvr
= mfpvr();
1185 const ppc_def_t
*base_spec
;
1187 uint32_t vmx
= kvmppc_get_vmx();
1188 uint32_t dfp
= kvmppc_get_dfp();
1190 base_spec
= ppc_find_by_pvr(host_pvr
);
1192 spec
= g_malloc0(sizeof(*spec
));
1193 memcpy(spec
, base_spec
, sizeof(*spec
));
1195 /* Now fix up the spec with information we can query from the host */
1198 /* Only override when we know what the host supports */
1199 alter_insns(&spec
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1200 alter_insns(&spec
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1203 /* Only override when we know what the host supports */
1204 alter_insns(&spec
->insns_flags2
, PPC2_DFP
, dfp
);
1210 int kvmppc_fixup_cpu(CPUPPCState
*env
)
1214 /* Adjust cpu index for SMT */
1215 smt
= kvmppc_smt_threads();
1216 env
->cpu_index
= (env
->cpu_index
/ smp_threads
) * smt
1217 + (env
->cpu_index
% smp_threads
);
1223 bool kvm_arch_stop_on_emulation_error(CPUPPCState
*env
)
1228 int kvm_arch_on_sigbus_vcpu(CPUPPCState
*env
, int code
, void *addr
)
1233 int kvm_arch_on_sigbus(int code
, void *addr
)