2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu-timer.h"
30 #include "device_tree.h"
32 #include "hw/sysbus.h"
34 #include "hw/spapr_vio.h"
39 #define dprintf(fmt, ...) \
40 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
42 #define dprintf(fmt, ...) \
46 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
48 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
52 static int cap_interrupt_unset
= false;
53 static int cap_interrupt_level
= false;
54 static int cap_segstate
;
55 static int cap_booke_sregs
;
57 /* XXX We have a race condition where we actually have a level triggered
58 * interrupt, but the infrastructure can't expose that yet, so the guest
59 * takes but ignores it, goes to sleep and never gets notified that there's
60 * still an interrupt pending.
62 * As a quick workaround, let's just wake up again 20 ms after we injected
63 * an interrupt. That way we can assure that we're always reinjecting
64 * interrupts in case the guest swallowed them.
66 static QEMUTimer
*idle_timer
;
68 static void kvm_kick_env(void *env
)
73 int kvm_arch_init(KVMState
*s
)
75 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
76 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
77 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
78 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
80 if (!cap_interrupt_level
) {
81 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
82 "VM to stall at times!\n");
88 static int kvm_arch_sync_sregs(CPUState
*cenv
)
90 struct kvm_sregs sregs
;
93 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
94 /* What we're really trying to say is "if we're on BookE, we use
95 the native PVR for now". This is the only sane way to check
96 it though, so we potentially confuse users that they can run
97 BookE guests on BookS. Let's hope nobody dares enough :) */
101 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
106 ret
= kvm_vcpu_ioctl(cenv
, KVM_GET_SREGS
, &sregs
);
111 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
112 return kvm_vcpu_ioctl(cenv
, KVM_SET_SREGS
, &sregs
);
115 int kvm_arch_init_vcpu(CPUState
*cenv
)
119 ret
= kvm_arch_sync_sregs(cenv
);
124 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_env
, cenv
);
129 void kvm_arch_reset_vcpu(CPUState
*env
)
133 int kvm_arch_put_registers(CPUState
*env
, int level
)
135 struct kvm_regs regs
;
139 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
149 regs
.srr0
= env
->spr
[SPR_SRR0
];
150 regs
.srr1
= env
->spr
[SPR_SRR1
];
152 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
153 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
154 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
155 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
156 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
157 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
158 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
159 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
161 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
163 for (i
= 0;i
< 32; i
++)
164 regs
.gpr
[i
] = env
->gpr
[i
];
166 ret
= kvm_vcpu_ioctl(env
, KVM_SET_REGS
, ®s
);
173 int kvm_arch_get_registers(CPUState
*env
)
175 struct kvm_regs regs
;
176 struct kvm_sregs sregs
;
180 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
185 for (i
= 7; i
>= 0; i
--) {
186 env
->crf
[i
] = cr
& 15;
196 env
->spr
[SPR_SRR0
] = regs
.srr0
;
197 env
->spr
[SPR_SRR1
] = regs
.srr1
;
199 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
200 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
201 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
202 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
203 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
204 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
205 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
206 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
208 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
210 for (i
= 0;i
< 32; i
++)
211 env
->gpr
[i
] = regs
.gpr
[i
];
213 if (cap_booke_sregs
) {
214 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
219 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
220 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
221 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
222 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
223 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
224 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
225 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
226 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
227 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
228 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
229 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
230 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
233 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
234 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
235 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
236 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
237 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
238 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
241 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
242 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
245 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
246 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
249 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
250 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
251 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
252 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
253 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
254 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
255 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
256 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
257 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
258 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
259 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
260 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
261 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
262 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
263 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
264 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
265 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
267 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
268 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
269 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
270 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
273 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
274 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
277 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
278 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
279 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
283 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
284 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
285 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
286 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
287 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
288 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
289 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
290 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
291 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
292 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
293 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
296 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
297 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
300 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
301 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
302 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
305 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
306 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
307 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
308 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
310 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
311 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
312 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
318 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
323 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
327 for (i
= 0; i
< 64; i
++) {
328 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
329 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
334 for (i
= 0; i
< 16; i
++) {
335 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
339 for (i
= 0; i
< 8; i
++) {
340 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
341 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
342 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
343 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
350 int kvmppc_set_interrupt(CPUState
*env
, int irq
, int level
)
352 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
354 if (irq
!= PPC_INTERRUPT_EXT
) {
358 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
362 kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &virq
);
367 #if defined(TARGET_PPCEMB)
368 #define PPC_INPUT_INT PPC40x_INPUT_INT
369 #elif defined(TARGET_PPC64)
370 #define PPC_INPUT_INT PPC970_INPUT_INT
372 #define PPC_INPUT_INT PPC6xx_INPUT_INT
375 void kvm_arch_pre_run(CPUState
*env
, struct kvm_run
*run
)
380 /* PowerPC Qemu tracks the various core input pins (interrupt, critical
381 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
382 if (!cap_interrupt_level
&&
383 run
->ready_for_interrupt_injection
&&
384 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
385 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
387 /* For now KVM disregards the 'irq' argument. However, in the
388 * future KVM could cache it in-kernel to avoid a heavyweight exit
389 * when reading the UIC.
391 irq
= KVM_INTERRUPT_SET
;
393 dprintf("injected interrupt %d\n", irq
);
394 r
= kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &irq
);
396 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
398 /* Always wake up soon in case the interrupt was level based */
399 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
400 (get_ticks_per_sec() / 50));
403 /* We don't know if there are more interrupts pending after this. However,
404 * the guest will return to userspace in the course of handling this one
405 * anyways, so we will get a chance to deliver the rest. */
408 void kvm_arch_post_run(CPUState
*env
, struct kvm_run
*run
)
412 int kvm_arch_process_async_events(CPUState
*env
)
417 static int kvmppc_handle_halt(CPUState
*env
)
419 if (!(env
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
421 env
->exception_index
= EXCP_HLT
;
427 /* map dcr access to existing qemu dcr emulation */
428 static int kvmppc_handle_dcr_read(CPUState
*env
, uint32_t dcrn
, uint32_t *data
)
430 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
431 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
436 static int kvmppc_handle_dcr_write(CPUState
*env
, uint32_t dcrn
, uint32_t data
)
438 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
439 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
444 int kvm_arch_handle_exit(CPUState
*env
, struct kvm_run
*run
)
448 switch (run
->exit_reason
) {
450 if (run
->dcr
.is_write
) {
451 dprintf("handle dcr write\n");
452 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
454 dprintf("handle dcr read\n");
455 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
459 dprintf("handle halt\n");
460 ret
= kvmppc_handle_halt(env
);
462 #ifdef CONFIG_PSERIES
463 case KVM_EXIT_PAPR_HCALL
:
464 dprintf("handle PAPR hypercall\n");
465 run
->papr_hcall
.ret
= spapr_hypercall(env
, run
->papr_hcall
.nr
,
466 run
->papr_hcall
.args
);
471 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
479 static int read_cpuinfo(const char *field
, char *value
, int len
)
483 int field_len
= strlen(field
);
486 f
= fopen("/proc/cpuinfo", "r");
492 if(!fgets(line
, sizeof(line
), f
)) {
495 if (!strncmp(line
, field
, field_len
)) {
496 strncpy(value
, line
, len
);
507 uint32_t kvmppc_get_tbfreq(void)
511 uint32_t retval
= get_ticks_per_sec();
513 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
517 if (!(ns
= strchr(line
, ':'))) {
527 /* Try to find a device tree node for a CPU with clock-frequency property */
528 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
533 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
534 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
539 while ((dirp
= readdir(dp
)) != NULL
) {
541 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
545 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
552 if (buf
[0] == '\0') {
553 printf("Unknown host!\n");
560 uint64_t kvmppc_get_clockfreq(void)
567 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
571 strncat(buf
, "/clock-frequency", sizeof(buf
) - strlen(buf
));
573 f
= fopen(buf
, "rb");
578 len
= fread(tb
, sizeof(tb
[0]), 2, f
);
582 /* freq is only a single cell */
585 return *(uint64_t*)tb
;
591 int kvmppc_get_hypercall(CPUState
*env
, uint8_t *buf
, int buf_len
)
593 uint32_t *hc
= (uint32_t*)buf
;
595 struct kvm_ppc_pvinfo pvinfo
;
597 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
598 !kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_PVINFO
, &pvinfo
)) {
599 memcpy(buf
, pvinfo
.hcall
, buf_len
);
605 * Fallback to always fail hypercalls:
621 void kvmppc_set_papr(CPUState
*env
)
623 struct kvm_enable_cap cap
;
626 memset(&cap
, 0, sizeof(cap
));
627 cap
.cap
= KVM_CAP_PPC_PAPR
;
628 ret
= kvm_vcpu_ioctl(env
, KVM_ENABLE_CAP
, &cap
);
635 * XXX We set HIOR here. It really should be a qdev property of
636 * the CPU node, but we don't have CPUs converted to qdev yet.
638 * Once we have qdev CPUs, move HIOR to a qdev property and
641 /* XXX Set HIOR using new ioctl */
646 cpu_abort(env
, "This KVM version does not support PAPR\n");
649 bool kvm_arch_stop_on_emulation_error(CPUState
*env
)
654 int kvm_arch_on_sigbus_vcpu(CPUState
*env
, int code
, void *addr
)
659 int kvm_arch_on_sigbus(int code
, void *addr
)