]> git.proxmox.com Git - qemu.git/blob - target-ppc/kvm.c
pseries: Allow KVM Book3S-HV on PPC970 CPUS
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qemu-timer.h"
26 #include "sysemu.h"
27 #include "kvm.h"
28 #include "kvm_ppc.h"
29 #include "cpu.h"
30 #include "device_tree.h"
31 #include "hw/spapr.h"
32
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35 #include "hw/spapr_vio.h"
36
37 //#define DEBUG_KVM
38
39 #ifdef DEBUG_KVM
40 #define dprintf(fmt, ...) \
41 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
42 #else
43 #define dprintf(fmt, ...) \
44 do { } while (0)
45 #endif
46
47 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
48
49 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
50 KVM_CAP_LAST_INFO
51 };
52
53 static int cap_interrupt_unset = false;
54 static int cap_interrupt_level = false;
55 static int cap_segstate;
56 static int cap_booke_sregs;
57 static int cap_ppc_smt;
58 static int cap_ppc_rma;
59
60 /* XXX We have a race condition where we actually have a level triggered
61 * interrupt, but the infrastructure can't expose that yet, so the guest
62 * takes but ignores it, goes to sleep and never gets notified that there's
63 * still an interrupt pending.
64 *
65 * As a quick workaround, let's just wake up again 20 ms after we injected
66 * an interrupt. That way we can assure that we're always reinjecting
67 * interrupts in case the guest swallowed them.
68 */
69 static QEMUTimer *idle_timer;
70
71 static void kvm_kick_env(void *env)
72 {
73 qemu_cpu_kick(env);
74 }
75
76 int kvm_arch_init(KVMState *s)
77 {
78 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
79 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
80 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
81 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
82 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
83 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
84
85 if (!cap_interrupt_level) {
86 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
87 "VM to stall at times!\n");
88 }
89
90 return 0;
91 }
92
93 static int kvm_arch_sync_sregs(CPUState *cenv)
94 {
95 struct kvm_sregs sregs;
96 int ret;
97
98 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
99 /* What we're really trying to say is "if we're on BookE, we use
100 the native PVR for now". This is the only sane way to check
101 it though, so we potentially confuse users that they can run
102 BookE guests on BookS. Let's hope nobody dares enough :) */
103 return 0;
104 } else {
105 if (!cap_segstate) {
106 fprintf(stderr, "kvm error: missing PVR setting capability\n");
107 return -ENOSYS;
108 }
109 }
110
111 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
112 if (ret) {
113 return ret;
114 }
115
116 sregs.pvr = cenv->spr[SPR_PVR];
117 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
118 }
119
120 /* Set up a shared TLB array with KVM */
121 static int kvm_booke206_tlb_init(CPUState *env)
122 {
123 struct kvm_book3e_206_tlb_params params = {};
124 struct kvm_config_tlb cfg = {};
125 struct kvm_enable_cap encap = {};
126 unsigned int entries = 0;
127 int ret, i;
128
129 if (!kvm_enabled() ||
130 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
131 return 0;
132 }
133
134 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
135
136 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
137 params.tlb_sizes[i] = booke206_tlb_size(env, i);
138 params.tlb_ways[i] = booke206_tlb_ways(env, i);
139 entries += params.tlb_sizes[i];
140 }
141
142 assert(entries == env->nb_tlb);
143 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
144
145 env->tlb_dirty = true;
146
147 cfg.array = (uintptr_t)env->tlb.tlbm;
148 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
149 cfg.params = (uintptr_t)&params;
150 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
151
152 encap.cap = KVM_CAP_SW_TLB;
153 encap.args[0] = (uintptr_t)&cfg;
154
155 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
156 if (ret < 0) {
157 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
158 __func__, strerror(-ret));
159 return ret;
160 }
161
162 env->kvm_sw_tlb = true;
163 return 0;
164 }
165
166 int kvm_arch_init_vcpu(CPUState *cenv)
167 {
168 int ret;
169
170 ret = kvm_arch_sync_sregs(cenv);
171 if (ret) {
172 return ret;
173 }
174
175 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
176
177 /* Some targets support access to KVM's guest TLB. */
178 switch (cenv->mmu_model) {
179 case POWERPC_MMU_BOOKE206:
180 ret = kvm_booke206_tlb_init(cenv);
181 break;
182 default:
183 break;
184 }
185
186 return ret;
187 }
188
189 void kvm_arch_reset_vcpu(CPUState *env)
190 {
191 }
192
193 static void kvm_sw_tlb_put(CPUState *env)
194 {
195 struct kvm_dirty_tlb dirty_tlb;
196 unsigned char *bitmap;
197 int ret;
198
199 if (!env->kvm_sw_tlb) {
200 return;
201 }
202
203 bitmap = g_malloc((env->nb_tlb + 7) / 8);
204 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
205
206 dirty_tlb.bitmap = (uintptr_t)bitmap;
207 dirty_tlb.num_dirty = env->nb_tlb;
208
209 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
210 if (ret) {
211 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
212 __func__, strerror(-ret));
213 }
214
215 g_free(bitmap);
216 }
217
218 int kvm_arch_put_registers(CPUState *env, int level)
219 {
220 struct kvm_regs regs;
221 int ret;
222 int i;
223
224 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
225 if (ret < 0)
226 return ret;
227
228 regs.ctr = env->ctr;
229 regs.lr = env->lr;
230 regs.xer = env->xer;
231 regs.msr = env->msr;
232 regs.pc = env->nip;
233
234 regs.srr0 = env->spr[SPR_SRR0];
235 regs.srr1 = env->spr[SPR_SRR1];
236
237 regs.sprg0 = env->spr[SPR_SPRG0];
238 regs.sprg1 = env->spr[SPR_SPRG1];
239 regs.sprg2 = env->spr[SPR_SPRG2];
240 regs.sprg3 = env->spr[SPR_SPRG3];
241 regs.sprg4 = env->spr[SPR_SPRG4];
242 regs.sprg5 = env->spr[SPR_SPRG5];
243 regs.sprg6 = env->spr[SPR_SPRG6];
244 regs.sprg7 = env->spr[SPR_SPRG7];
245
246 regs.pid = env->spr[SPR_BOOKE_PID];
247
248 for (i = 0;i < 32; i++)
249 regs.gpr[i] = env->gpr[i];
250
251 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
252 if (ret < 0)
253 return ret;
254
255 if (env->tlb_dirty) {
256 kvm_sw_tlb_put(env);
257 env->tlb_dirty = false;
258 }
259
260 return ret;
261 }
262
263 int kvm_arch_get_registers(CPUState *env)
264 {
265 struct kvm_regs regs;
266 struct kvm_sregs sregs;
267 uint32_t cr;
268 int i, ret;
269
270 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
271 if (ret < 0)
272 return ret;
273
274 cr = regs.cr;
275 for (i = 7; i >= 0; i--) {
276 env->crf[i] = cr & 15;
277 cr >>= 4;
278 }
279
280 env->ctr = regs.ctr;
281 env->lr = regs.lr;
282 env->xer = regs.xer;
283 env->msr = regs.msr;
284 env->nip = regs.pc;
285
286 env->spr[SPR_SRR0] = regs.srr0;
287 env->spr[SPR_SRR1] = regs.srr1;
288
289 env->spr[SPR_SPRG0] = regs.sprg0;
290 env->spr[SPR_SPRG1] = regs.sprg1;
291 env->spr[SPR_SPRG2] = regs.sprg2;
292 env->spr[SPR_SPRG3] = regs.sprg3;
293 env->spr[SPR_SPRG4] = regs.sprg4;
294 env->spr[SPR_SPRG5] = regs.sprg5;
295 env->spr[SPR_SPRG6] = regs.sprg6;
296 env->spr[SPR_SPRG7] = regs.sprg7;
297
298 env->spr[SPR_BOOKE_PID] = regs.pid;
299
300 for (i = 0;i < 32; i++)
301 env->gpr[i] = regs.gpr[i];
302
303 if (cap_booke_sregs) {
304 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
305 if (ret < 0) {
306 return ret;
307 }
308
309 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
310 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
311 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
312 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
313 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
314 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
315 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
316 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
317 env->spr[SPR_DECR] = sregs.u.e.dec;
318 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
319 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
320 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
321 }
322
323 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
324 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
325 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
326 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
327 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
328 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
329 }
330
331 if (sregs.u.e.features & KVM_SREGS_E_64) {
332 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
333 }
334
335 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
336 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
337 }
338
339 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
340 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
341 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
342 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
343 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
344 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
345 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
346 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
347 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
348 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
349 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
350 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
351 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
352 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
353 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
354 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
355 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
356
357 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
358 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
359 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
360 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
361 }
362
363 if (sregs.u.e.features & KVM_SREGS_E_PM) {
364 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
365 }
366
367 if (sregs.u.e.features & KVM_SREGS_E_PC) {
368 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
369 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
370 }
371 }
372
373 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
374 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
375 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
376 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
377 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
378 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
379 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
380 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
381 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
382 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
383 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
384 }
385
386 if (sregs.u.e.features & KVM_SREGS_EXP) {
387 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
388 }
389
390 if (sregs.u.e.features & KVM_SREGS_E_PD) {
391 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
392 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
393 }
394
395 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
396 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
397 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
398 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
399
400 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
401 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
402 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
403 }
404 }
405 }
406
407 if (cap_segstate) {
408 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
409 if (ret < 0) {
410 return ret;
411 }
412
413 ppc_store_sdr1(env, sregs.u.s.sdr1);
414
415 /* Sync SLB */
416 #ifdef TARGET_PPC64
417 for (i = 0; i < 64; i++) {
418 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
419 sregs.u.s.ppc64.slb[i].slbv);
420 }
421 #endif
422
423 /* Sync SRs */
424 for (i = 0; i < 16; i++) {
425 env->sr[i] = sregs.u.s.ppc32.sr[i];
426 }
427
428 /* Sync BATs */
429 for (i = 0; i < 8; i++) {
430 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
431 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
432 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
433 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
434 }
435 }
436
437 return 0;
438 }
439
440 int kvmppc_set_interrupt(CPUState *env, int irq, int level)
441 {
442 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
443
444 if (irq != PPC_INTERRUPT_EXT) {
445 return 0;
446 }
447
448 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
449 return 0;
450 }
451
452 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
453
454 return 0;
455 }
456
457 #if defined(TARGET_PPCEMB)
458 #define PPC_INPUT_INT PPC40x_INPUT_INT
459 #elif defined(TARGET_PPC64)
460 #define PPC_INPUT_INT PPC970_INPUT_INT
461 #else
462 #define PPC_INPUT_INT PPC6xx_INPUT_INT
463 #endif
464
465 void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
466 {
467 int r;
468 unsigned irq;
469
470 /* PowerPC Qemu tracks the various core input pins (interrupt, critical
471 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
472 if (!cap_interrupt_level &&
473 run->ready_for_interrupt_injection &&
474 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
475 (env->irq_input_state & (1<<PPC_INPUT_INT)))
476 {
477 /* For now KVM disregards the 'irq' argument. However, in the
478 * future KVM could cache it in-kernel to avoid a heavyweight exit
479 * when reading the UIC.
480 */
481 irq = KVM_INTERRUPT_SET;
482
483 dprintf("injected interrupt %d\n", irq);
484 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
485 if (r < 0)
486 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
487
488 /* Always wake up soon in case the interrupt was level based */
489 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
490 (get_ticks_per_sec() / 50));
491 }
492
493 /* We don't know if there are more interrupts pending after this. However,
494 * the guest will return to userspace in the course of handling this one
495 * anyways, so we will get a chance to deliver the rest. */
496 }
497
498 void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
499 {
500 }
501
502 int kvm_arch_process_async_events(CPUState *env)
503 {
504 return 0;
505 }
506
507 static int kvmppc_handle_halt(CPUState *env)
508 {
509 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
510 env->halted = 1;
511 env->exception_index = EXCP_HLT;
512 }
513
514 return 0;
515 }
516
517 /* map dcr access to existing qemu dcr emulation */
518 static int kvmppc_handle_dcr_read(CPUState *env, uint32_t dcrn, uint32_t *data)
519 {
520 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
521 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
522
523 return 0;
524 }
525
526 static int kvmppc_handle_dcr_write(CPUState *env, uint32_t dcrn, uint32_t data)
527 {
528 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
529 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
530
531 return 0;
532 }
533
534 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
535 {
536 int ret;
537
538 switch (run->exit_reason) {
539 case KVM_EXIT_DCR:
540 if (run->dcr.is_write) {
541 dprintf("handle dcr write\n");
542 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
543 } else {
544 dprintf("handle dcr read\n");
545 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
546 }
547 break;
548 case KVM_EXIT_HLT:
549 dprintf("handle halt\n");
550 ret = kvmppc_handle_halt(env);
551 break;
552 #ifdef CONFIG_PSERIES
553 case KVM_EXIT_PAPR_HCALL:
554 dprintf("handle PAPR hypercall\n");
555 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
556 run->papr_hcall.args);
557 ret = 1;
558 break;
559 #endif
560 default:
561 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
562 ret = -1;
563 break;
564 }
565
566 return ret;
567 }
568
569 static int read_cpuinfo(const char *field, char *value, int len)
570 {
571 FILE *f;
572 int ret = -1;
573 int field_len = strlen(field);
574 char line[512];
575
576 f = fopen("/proc/cpuinfo", "r");
577 if (!f) {
578 return -1;
579 }
580
581 do {
582 if(!fgets(line, sizeof(line), f)) {
583 break;
584 }
585 if (!strncmp(line, field, field_len)) {
586 strncpy(value, line, len);
587 ret = 0;
588 break;
589 }
590 } while(*line);
591
592 fclose(f);
593
594 return ret;
595 }
596
597 uint32_t kvmppc_get_tbfreq(void)
598 {
599 char line[512];
600 char *ns;
601 uint32_t retval = get_ticks_per_sec();
602
603 if (read_cpuinfo("timebase", line, sizeof(line))) {
604 return retval;
605 }
606
607 if (!(ns = strchr(line, ':'))) {
608 return retval;
609 }
610
611 ns++;
612
613 retval = atoi(ns);
614 return retval;
615 }
616
617 /* Try to find a device tree node for a CPU with clock-frequency property */
618 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
619 {
620 struct dirent *dirp;
621 DIR *dp;
622
623 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
624 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
625 return -1;
626 }
627
628 buf[0] = '\0';
629 while ((dirp = readdir(dp)) != NULL) {
630 FILE *f;
631 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
632 dirp->d_name);
633 f = fopen(buf, "r");
634 if (f) {
635 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
636 fclose(f);
637 break;
638 }
639 buf[0] = '\0';
640 }
641 closedir(dp);
642 if (buf[0] == '\0') {
643 printf("Unknown host!\n");
644 return -1;
645 }
646
647 return 0;
648 }
649
650 uint64_t kvmppc_get_clockfreq(void)
651 {
652 char buf[512];
653 uint32_t tb[2];
654 FILE *f;
655 int len;
656
657 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
658 return 0;
659 }
660
661 strncat(buf, "/clock-frequency", sizeof(buf) - strlen(buf));
662
663 f = fopen(buf, "rb");
664 if (!f) {
665 return -1;
666 }
667
668 len = fread(tb, sizeof(tb[0]), 2, f);
669 fclose(f);
670 switch (len) {
671 case 1:
672 /* freq is only a single cell */
673 return tb[0];
674 case 2:
675 return *(uint64_t*)tb;
676 }
677
678 return 0;
679 }
680
681 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
682 {
683 uint32_t *hc = (uint32_t*)buf;
684
685 struct kvm_ppc_pvinfo pvinfo;
686
687 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
688 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
689 memcpy(buf, pvinfo.hcall, buf_len);
690
691 return 0;
692 }
693
694 /*
695 * Fallback to always fail hypercalls:
696 *
697 * li r3, -1
698 * nop
699 * nop
700 * nop
701 */
702
703 hc[0] = 0x3860ffff;
704 hc[1] = 0x60000000;
705 hc[2] = 0x60000000;
706 hc[3] = 0x60000000;
707
708 return 0;
709 }
710
711 void kvmppc_set_papr(CPUState *env)
712 {
713 struct kvm_enable_cap cap = {};
714 struct kvm_one_reg reg = {};
715 struct kvm_sregs sregs = {};
716 int ret;
717
718 cap.cap = KVM_CAP_PPC_PAPR;
719 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
720
721 if (ret) {
722 goto fail;
723 }
724
725 /*
726 * XXX We set HIOR here. It really should be a qdev property of
727 * the CPU node, but we don't have CPUs converted to qdev yet.
728 *
729 * Once we have qdev CPUs, move HIOR to a qdev property and
730 * remove this chunk.
731 */
732 reg.id = KVM_ONE_REG_PPC_HIOR;
733 reg.u.reg64 = env->spr[SPR_HIOR];
734 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
735 if (ret) {
736 goto fail;
737 }
738
739 /* Set SDR1 so kernel space finds the HTAB */
740 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
741 if (ret) {
742 goto fail;
743 }
744
745 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
746
747 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
748 if (ret) {
749 goto fail;
750 }
751
752 return;
753
754 fail:
755 cpu_abort(env, "This KVM version does not support PAPR\n");
756 }
757
758 int kvmppc_smt_threads(void)
759 {
760 return cap_ppc_smt ? cap_ppc_smt : 1;
761 }
762
763 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
764 {
765 void *rma;
766 off_t size;
767 int fd;
768 struct kvm_allocate_rma ret;
769 MemoryRegion *rma_region;
770
771 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
772 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
773 * not necessary on this hardware
774 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
775 *
776 * FIXME: We should allow the user to force contiguous RMA
777 * allocation in the cap_ppc_rma==1 case.
778 */
779 if (cap_ppc_rma < 2) {
780 return 0;
781 }
782
783 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
784 if (fd < 0) {
785 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
786 strerror(errno));
787 return -1;
788 }
789
790 size = MIN(ret.rma_size, 256ul << 20);
791
792 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
793 if (rma == MAP_FAILED) {
794 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
795 return -1;
796 };
797
798 rma_region = g_new(MemoryRegion, 1);
799 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
800 memory_region_add_subregion(sysmem, 0, rma_region);
801
802 return size;
803 }
804
805 bool kvm_arch_stop_on_emulation_error(CPUState *env)
806 {
807 return true;
808 }
809
810 int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
811 {
812 return 1;
813 }
814
815 int kvm_arch_on_sigbus(int code, void *addr)
816 {
817 return 1;
818 }