]> git.proxmox.com Git - mirror_qemu.git/blob - target-ppc/kvm.c
pseries: Use Book3S-HV TCE acceleration capabilities
[mirror_qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qemu-timer.h"
26 #include "sysemu.h"
27 #include "kvm.h"
28 #include "kvm_ppc.h"
29 #include "cpu.h"
30 #include "device_tree.h"
31 #include "hw/sysbus.h"
32 #include "hw/spapr.h"
33
34 #include "hw/sysbus.h"
35 #include "hw/spapr.h"
36 #include "hw/spapr_vio.h"
37
38 //#define DEBUG_KVM
39
40 #ifdef DEBUG_KVM
41 #define dprintf(fmt, ...) \
42 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
43 #else
44 #define dprintf(fmt, ...) \
45 do { } while (0)
46 #endif
47
48 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
49
50 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
51 KVM_CAP_LAST_INFO
52 };
53
54 static int cap_interrupt_unset = false;
55 static int cap_interrupt_level = false;
56 static int cap_segstate;
57 static int cap_booke_sregs;
58 static int cap_ppc_smt;
59 static int cap_ppc_rma;
60 static int cap_spapr_tce;
61
62 /* XXX We have a race condition where we actually have a level triggered
63 * interrupt, but the infrastructure can't expose that yet, so the guest
64 * takes but ignores it, goes to sleep and never gets notified that there's
65 * still an interrupt pending.
66 *
67 * As a quick workaround, let's just wake up again 20 ms after we injected
68 * an interrupt. That way we can assure that we're always reinjecting
69 * interrupts in case the guest swallowed them.
70 */
71 static QEMUTimer *idle_timer;
72
73 static void kvm_kick_env(void *env)
74 {
75 qemu_cpu_kick(env);
76 }
77
78 int kvm_arch_init(KVMState *s)
79 {
80 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
81 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
82 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
83 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
84 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
85 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
86 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
87
88 if (!cap_interrupt_level) {
89 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
90 "VM to stall at times!\n");
91 }
92
93 return 0;
94 }
95
96 static int kvm_arch_sync_sregs(CPUState *cenv)
97 {
98 struct kvm_sregs sregs;
99 int ret;
100
101 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
102 /* What we're really trying to say is "if we're on BookE, we use
103 the native PVR for now". This is the only sane way to check
104 it though, so we potentially confuse users that they can run
105 BookE guests on BookS. Let's hope nobody dares enough :) */
106 return 0;
107 } else {
108 if (!cap_segstate) {
109 fprintf(stderr, "kvm error: missing PVR setting capability\n");
110 return -ENOSYS;
111 }
112 }
113
114 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
115 if (ret) {
116 return ret;
117 }
118
119 sregs.pvr = cenv->spr[SPR_PVR];
120 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
121 }
122
123 /* Set up a shared TLB array with KVM */
124 static int kvm_booke206_tlb_init(CPUState *env)
125 {
126 struct kvm_book3e_206_tlb_params params = {};
127 struct kvm_config_tlb cfg = {};
128 struct kvm_enable_cap encap = {};
129 unsigned int entries = 0;
130 int ret, i;
131
132 if (!kvm_enabled() ||
133 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
134 return 0;
135 }
136
137 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
138
139 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
140 params.tlb_sizes[i] = booke206_tlb_size(env, i);
141 params.tlb_ways[i] = booke206_tlb_ways(env, i);
142 entries += params.tlb_sizes[i];
143 }
144
145 assert(entries == env->nb_tlb);
146 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
147
148 env->tlb_dirty = true;
149
150 cfg.array = (uintptr_t)env->tlb.tlbm;
151 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
152 cfg.params = (uintptr_t)&params;
153 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
154
155 encap.cap = KVM_CAP_SW_TLB;
156 encap.args[0] = (uintptr_t)&cfg;
157
158 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
159 if (ret < 0) {
160 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
161 __func__, strerror(-ret));
162 return ret;
163 }
164
165 env->kvm_sw_tlb = true;
166 return 0;
167 }
168
169 int kvm_arch_init_vcpu(CPUState *cenv)
170 {
171 int ret;
172
173 ret = kvm_arch_sync_sregs(cenv);
174 if (ret) {
175 return ret;
176 }
177
178 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
179
180 /* Some targets support access to KVM's guest TLB. */
181 switch (cenv->mmu_model) {
182 case POWERPC_MMU_BOOKE206:
183 ret = kvm_booke206_tlb_init(cenv);
184 break;
185 default:
186 break;
187 }
188
189 return ret;
190 }
191
192 void kvm_arch_reset_vcpu(CPUState *env)
193 {
194 }
195
196 static void kvm_sw_tlb_put(CPUState *env)
197 {
198 struct kvm_dirty_tlb dirty_tlb;
199 unsigned char *bitmap;
200 int ret;
201
202 if (!env->kvm_sw_tlb) {
203 return;
204 }
205
206 bitmap = g_malloc((env->nb_tlb + 7) / 8);
207 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
208
209 dirty_tlb.bitmap = (uintptr_t)bitmap;
210 dirty_tlb.num_dirty = env->nb_tlb;
211
212 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
213 if (ret) {
214 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
215 __func__, strerror(-ret));
216 }
217
218 g_free(bitmap);
219 }
220
221 int kvm_arch_put_registers(CPUState *env, int level)
222 {
223 struct kvm_regs regs;
224 int ret;
225 int i;
226
227 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
228 if (ret < 0)
229 return ret;
230
231 regs.ctr = env->ctr;
232 regs.lr = env->lr;
233 regs.xer = env->xer;
234 regs.msr = env->msr;
235 regs.pc = env->nip;
236
237 regs.srr0 = env->spr[SPR_SRR0];
238 regs.srr1 = env->spr[SPR_SRR1];
239
240 regs.sprg0 = env->spr[SPR_SPRG0];
241 regs.sprg1 = env->spr[SPR_SPRG1];
242 regs.sprg2 = env->spr[SPR_SPRG2];
243 regs.sprg3 = env->spr[SPR_SPRG3];
244 regs.sprg4 = env->spr[SPR_SPRG4];
245 regs.sprg5 = env->spr[SPR_SPRG5];
246 regs.sprg6 = env->spr[SPR_SPRG6];
247 regs.sprg7 = env->spr[SPR_SPRG7];
248
249 regs.pid = env->spr[SPR_BOOKE_PID];
250
251 for (i = 0;i < 32; i++)
252 regs.gpr[i] = env->gpr[i];
253
254 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
255 if (ret < 0)
256 return ret;
257
258 if (env->tlb_dirty) {
259 kvm_sw_tlb_put(env);
260 env->tlb_dirty = false;
261 }
262
263 return ret;
264 }
265
266 int kvm_arch_get_registers(CPUState *env)
267 {
268 struct kvm_regs regs;
269 struct kvm_sregs sregs;
270 uint32_t cr;
271 int i, ret;
272
273 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
274 if (ret < 0)
275 return ret;
276
277 cr = regs.cr;
278 for (i = 7; i >= 0; i--) {
279 env->crf[i] = cr & 15;
280 cr >>= 4;
281 }
282
283 env->ctr = regs.ctr;
284 env->lr = regs.lr;
285 env->xer = regs.xer;
286 env->msr = regs.msr;
287 env->nip = regs.pc;
288
289 env->spr[SPR_SRR0] = regs.srr0;
290 env->spr[SPR_SRR1] = regs.srr1;
291
292 env->spr[SPR_SPRG0] = regs.sprg0;
293 env->spr[SPR_SPRG1] = regs.sprg1;
294 env->spr[SPR_SPRG2] = regs.sprg2;
295 env->spr[SPR_SPRG3] = regs.sprg3;
296 env->spr[SPR_SPRG4] = regs.sprg4;
297 env->spr[SPR_SPRG5] = regs.sprg5;
298 env->spr[SPR_SPRG6] = regs.sprg6;
299 env->spr[SPR_SPRG7] = regs.sprg7;
300
301 env->spr[SPR_BOOKE_PID] = regs.pid;
302
303 for (i = 0;i < 32; i++)
304 env->gpr[i] = regs.gpr[i];
305
306 if (cap_booke_sregs) {
307 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
308 if (ret < 0) {
309 return ret;
310 }
311
312 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
313 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
314 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
315 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
316 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
317 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
318 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
319 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
320 env->spr[SPR_DECR] = sregs.u.e.dec;
321 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
322 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
323 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
324 }
325
326 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
327 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
328 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
329 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
330 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
331 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
332 }
333
334 if (sregs.u.e.features & KVM_SREGS_E_64) {
335 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
336 }
337
338 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
339 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
340 }
341
342 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
343 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
344 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
345 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
346 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
347 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
348 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
349 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
350 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
351 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
352 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
353 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
354 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
355 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
356 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
357 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
358 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
359
360 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
361 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
362 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
363 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
364 }
365
366 if (sregs.u.e.features & KVM_SREGS_E_PM) {
367 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
368 }
369
370 if (sregs.u.e.features & KVM_SREGS_E_PC) {
371 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
372 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
373 }
374 }
375
376 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
377 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
378 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
379 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
380 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
381 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
382 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
383 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
384 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
385 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
386 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
387 }
388
389 if (sregs.u.e.features & KVM_SREGS_EXP) {
390 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
391 }
392
393 if (sregs.u.e.features & KVM_SREGS_E_PD) {
394 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
395 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
396 }
397
398 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
399 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
400 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
401 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
402
403 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
404 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
405 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
406 }
407 }
408 }
409
410 if (cap_segstate) {
411 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
412 if (ret < 0) {
413 return ret;
414 }
415
416 ppc_store_sdr1(env, sregs.u.s.sdr1);
417
418 /* Sync SLB */
419 #ifdef TARGET_PPC64
420 for (i = 0; i < 64; i++) {
421 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
422 sregs.u.s.ppc64.slb[i].slbv);
423 }
424 #endif
425
426 /* Sync SRs */
427 for (i = 0; i < 16; i++) {
428 env->sr[i] = sregs.u.s.ppc32.sr[i];
429 }
430
431 /* Sync BATs */
432 for (i = 0; i < 8; i++) {
433 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
434 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
435 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
436 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
437 }
438 }
439
440 return 0;
441 }
442
443 int kvmppc_set_interrupt(CPUState *env, int irq, int level)
444 {
445 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
446
447 if (irq != PPC_INTERRUPT_EXT) {
448 return 0;
449 }
450
451 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
452 return 0;
453 }
454
455 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
456
457 return 0;
458 }
459
460 #if defined(TARGET_PPCEMB)
461 #define PPC_INPUT_INT PPC40x_INPUT_INT
462 #elif defined(TARGET_PPC64)
463 #define PPC_INPUT_INT PPC970_INPUT_INT
464 #else
465 #define PPC_INPUT_INT PPC6xx_INPUT_INT
466 #endif
467
468 void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
469 {
470 int r;
471 unsigned irq;
472
473 /* PowerPC Qemu tracks the various core input pins (interrupt, critical
474 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
475 if (!cap_interrupt_level &&
476 run->ready_for_interrupt_injection &&
477 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
478 (env->irq_input_state & (1<<PPC_INPUT_INT)))
479 {
480 /* For now KVM disregards the 'irq' argument. However, in the
481 * future KVM could cache it in-kernel to avoid a heavyweight exit
482 * when reading the UIC.
483 */
484 irq = KVM_INTERRUPT_SET;
485
486 dprintf("injected interrupt %d\n", irq);
487 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
488 if (r < 0)
489 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
490
491 /* Always wake up soon in case the interrupt was level based */
492 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
493 (get_ticks_per_sec() / 50));
494 }
495
496 /* We don't know if there are more interrupts pending after this. However,
497 * the guest will return to userspace in the course of handling this one
498 * anyways, so we will get a chance to deliver the rest. */
499 }
500
501 void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
502 {
503 }
504
505 int kvm_arch_process_async_events(CPUState *env)
506 {
507 return 0;
508 }
509
510 static int kvmppc_handle_halt(CPUState *env)
511 {
512 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
513 env->halted = 1;
514 env->exception_index = EXCP_HLT;
515 }
516
517 return 0;
518 }
519
520 /* map dcr access to existing qemu dcr emulation */
521 static int kvmppc_handle_dcr_read(CPUState *env, uint32_t dcrn, uint32_t *data)
522 {
523 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
524 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
525
526 return 0;
527 }
528
529 static int kvmppc_handle_dcr_write(CPUState *env, uint32_t dcrn, uint32_t data)
530 {
531 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
532 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
533
534 return 0;
535 }
536
537 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
538 {
539 int ret;
540
541 switch (run->exit_reason) {
542 case KVM_EXIT_DCR:
543 if (run->dcr.is_write) {
544 dprintf("handle dcr write\n");
545 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
546 } else {
547 dprintf("handle dcr read\n");
548 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
549 }
550 break;
551 case KVM_EXIT_HLT:
552 dprintf("handle halt\n");
553 ret = kvmppc_handle_halt(env);
554 break;
555 #ifdef CONFIG_PSERIES
556 case KVM_EXIT_PAPR_HCALL:
557 dprintf("handle PAPR hypercall\n");
558 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
559 run->papr_hcall.args);
560 ret = 1;
561 break;
562 #endif
563 default:
564 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
565 ret = -1;
566 break;
567 }
568
569 return ret;
570 }
571
572 static int read_cpuinfo(const char *field, char *value, int len)
573 {
574 FILE *f;
575 int ret = -1;
576 int field_len = strlen(field);
577 char line[512];
578
579 f = fopen("/proc/cpuinfo", "r");
580 if (!f) {
581 return -1;
582 }
583
584 do {
585 if(!fgets(line, sizeof(line), f)) {
586 break;
587 }
588 if (!strncmp(line, field, field_len)) {
589 strncpy(value, line, len);
590 ret = 0;
591 break;
592 }
593 } while(*line);
594
595 fclose(f);
596
597 return ret;
598 }
599
600 uint32_t kvmppc_get_tbfreq(void)
601 {
602 char line[512];
603 char *ns;
604 uint32_t retval = get_ticks_per_sec();
605
606 if (read_cpuinfo("timebase", line, sizeof(line))) {
607 return retval;
608 }
609
610 if (!(ns = strchr(line, ':'))) {
611 return retval;
612 }
613
614 ns++;
615
616 retval = atoi(ns);
617 return retval;
618 }
619
620 /* Try to find a device tree node for a CPU with clock-frequency property */
621 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
622 {
623 struct dirent *dirp;
624 DIR *dp;
625
626 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
627 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
628 return -1;
629 }
630
631 buf[0] = '\0';
632 while ((dirp = readdir(dp)) != NULL) {
633 FILE *f;
634 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
635 dirp->d_name);
636 f = fopen(buf, "r");
637 if (f) {
638 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
639 fclose(f);
640 break;
641 }
642 buf[0] = '\0';
643 }
644 closedir(dp);
645 if (buf[0] == '\0') {
646 printf("Unknown host!\n");
647 return -1;
648 }
649
650 return 0;
651 }
652
653 uint64_t kvmppc_get_clockfreq(void)
654 {
655 char buf[512];
656 uint32_t tb[2];
657 FILE *f;
658 int len;
659
660 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
661 return 0;
662 }
663
664 strncat(buf, "/clock-frequency", sizeof(buf) - strlen(buf));
665
666 f = fopen(buf, "rb");
667 if (!f) {
668 return -1;
669 }
670
671 len = fread(tb, sizeof(tb[0]), 2, f);
672 fclose(f);
673 switch (len) {
674 case 1:
675 /* freq is only a single cell */
676 return tb[0];
677 case 2:
678 return *(uint64_t*)tb;
679 }
680
681 return 0;
682 }
683
684 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
685 {
686 uint32_t *hc = (uint32_t*)buf;
687
688 struct kvm_ppc_pvinfo pvinfo;
689
690 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
691 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
692 memcpy(buf, pvinfo.hcall, buf_len);
693
694 return 0;
695 }
696
697 /*
698 * Fallback to always fail hypercalls:
699 *
700 * li r3, -1
701 * nop
702 * nop
703 * nop
704 */
705
706 hc[0] = 0x3860ffff;
707 hc[1] = 0x60000000;
708 hc[2] = 0x60000000;
709 hc[3] = 0x60000000;
710
711 return 0;
712 }
713
714 void kvmppc_set_papr(CPUState *env)
715 {
716 struct kvm_enable_cap cap = {};
717 struct kvm_one_reg reg = {};
718 struct kvm_sregs sregs = {};
719 int ret;
720
721 cap.cap = KVM_CAP_PPC_PAPR;
722 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
723
724 if (ret) {
725 goto fail;
726 }
727
728 /*
729 * XXX We set HIOR here. It really should be a qdev property of
730 * the CPU node, but we don't have CPUs converted to qdev yet.
731 *
732 * Once we have qdev CPUs, move HIOR to a qdev property and
733 * remove this chunk.
734 */
735 reg.id = KVM_ONE_REG_PPC_HIOR;
736 reg.u.reg64 = env->spr[SPR_HIOR];
737 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
738 if (ret) {
739 goto fail;
740 }
741
742 /* Set SDR1 so kernel space finds the HTAB */
743 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
744 if (ret) {
745 goto fail;
746 }
747
748 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
749
750 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
751 if (ret) {
752 goto fail;
753 }
754
755 return;
756
757 fail:
758 cpu_abort(env, "This KVM version does not support PAPR\n");
759 }
760
761 int kvmppc_smt_threads(void)
762 {
763 return cap_ppc_smt ? cap_ppc_smt : 1;
764 }
765
766 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
767 {
768 void *rma;
769 off_t size;
770 int fd;
771 struct kvm_allocate_rma ret;
772 MemoryRegion *rma_region;
773
774 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
775 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
776 * not necessary on this hardware
777 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
778 *
779 * FIXME: We should allow the user to force contiguous RMA
780 * allocation in the cap_ppc_rma==1 case.
781 */
782 if (cap_ppc_rma < 2) {
783 return 0;
784 }
785
786 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
787 if (fd < 0) {
788 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
789 strerror(errno));
790 return -1;
791 }
792
793 size = MIN(ret.rma_size, 256ul << 20);
794
795 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
796 if (rma == MAP_FAILED) {
797 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
798 return -1;
799 };
800
801 rma_region = g_new(MemoryRegion, 1);
802 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
803 memory_region_add_subregion(sysmem, 0, rma_region);
804
805 return size;
806 }
807
808 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
809 {
810 struct kvm_create_spapr_tce args = {
811 .liobn = liobn,
812 .window_size = window_size,
813 };
814 long len;
815 int fd;
816 void *table;
817
818 if (!cap_spapr_tce) {
819 return NULL;
820 }
821
822 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
823 if (fd < 0) {
824 return NULL;
825 }
826
827 len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE) * sizeof(VIOsPAPR_RTCE);
828 /* FIXME: round this up to page size */
829
830 table = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
831 if (table == MAP_FAILED) {
832 close(fd);
833 return NULL;
834 }
835
836 *pfd = fd;
837 return table;
838 }
839
840 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
841 {
842 long len;
843
844 if (fd < 0) {
845 return -1;
846 }
847
848 len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE)*sizeof(VIOsPAPR_RTCE);
849 if ((munmap(table, len) < 0) ||
850 (close(fd) < 0)) {
851 fprintf(stderr, "KVM: Unexpected error removing KVM SPAPR TCE "
852 "table: %s", strerror(errno));
853 /* Leak the table */
854 }
855
856 return 0;
857 }
858
859 bool kvm_arch_stop_on_emulation_error(CPUState *env)
860 {
861 return true;
862 }
863
864 int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
865 {
866 return 1;
867 }
868
869 int kvm_arch_on_sigbus(int code, void *addr)
870 {
871 return 1;
872 }