]> git.proxmox.com Git - mirror_qemu.git/blob - target-ppc/kvm.c
ppc: Add software breakpoint support
[mirror_qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
34
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41
42 //#define DEBUG_KVM
43
44 #ifdef DEBUG_KVM
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #else
48 #define DPRINTF(fmt, ...) \
49 do { } while (0)
50 #endif
51
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53
54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
56 };
57
58 static int cap_interrupt_unset = false;
59 static int cap_interrupt_level = false;
60 static int cap_segstate;
61 static int cap_booke_sregs;
62 static int cap_ppc_smt;
63 static int cap_ppc_rma;
64 static int cap_spapr_tce;
65 static int cap_spapr_multitce;
66 static int cap_spapr_vfio;
67 static int cap_hior;
68 static int cap_one_reg;
69 static int cap_epr;
70 static int cap_ppc_watchdog;
71 static int cap_papr;
72 static int cap_htab_fd;
73 static int cap_fixup_hcalls;
74
75 static uint32_t debug_inst_opcode;
76
77 /* XXX We have a race condition where we actually have a level triggered
78 * interrupt, but the infrastructure can't expose that yet, so the guest
79 * takes but ignores it, goes to sleep and never gets notified that there's
80 * still an interrupt pending.
81 *
82 * As a quick workaround, let's just wake up again 20 ms after we injected
83 * an interrupt. That way we can assure that we're always reinjecting
84 * interrupts in case the guest swallowed them.
85 */
86 static QEMUTimer *idle_timer;
87
88 static void kvm_kick_cpu(void *opaque)
89 {
90 PowerPCCPU *cpu = opaque;
91
92 qemu_cpu_kick(CPU(cpu));
93 }
94
95 static int kvm_ppc_register_host_cpu_type(void);
96
97 int kvm_arch_init(KVMState *s)
98 {
99 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
100 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
101 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
102 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
103 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
104 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
105 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
106 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
107 cap_spapr_vfio = false;
108 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
109 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
110 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
111 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
112 /* Note: we don't set cap_papr here, because this capability is
113 * only activated after this by kvmppc_set_papr() */
114 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
115 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
116
117 if (!cap_interrupt_level) {
118 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
119 "VM to stall at times!\n");
120 }
121
122 kvm_ppc_register_host_cpu_type();
123
124 return 0;
125 }
126
127 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
128 {
129 CPUPPCState *cenv = &cpu->env;
130 CPUState *cs = CPU(cpu);
131 struct kvm_sregs sregs;
132 int ret;
133
134 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
135 /* What we're really trying to say is "if we're on BookE, we use
136 the native PVR for now". This is the only sane way to check
137 it though, so we potentially confuse users that they can run
138 BookE guests on BookS. Let's hope nobody dares enough :) */
139 return 0;
140 } else {
141 if (!cap_segstate) {
142 fprintf(stderr, "kvm error: missing PVR setting capability\n");
143 return -ENOSYS;
144 }
145 }
146
147 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
148 if (ret) {
149 return ret;
150 }
151
152 sregs.pvr = cenv->spr[SPR_PVR];
153 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
154 }
155
156 /* Set up a shared TLB array with KVM */
157 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
158 {
159 CPUPPCState *env = &cpu->env;
160 CPUState *cs = CPU(cpu);
161 struct kvm_book3e_206_tlb_params params = {};
162 struct kvm_config_tlb cfg = {};
163 unsigned int entries = 0;
164 int ret, i;
165
166 if (!kvm_enabled() ||
167 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
168 return 0;
169 }
170
171 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
172
173 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
174 params.tlb_sizes[i] = booke206_tlb_size(env, i);
175 params.tlb_ways[i] = booke206_tlb_ways(env, i);
176 entries += params.tlb_sizes[i];
177 }
178
179 assert(entries == env->nb_tlb);
180 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
181
182 env->tlb_dirty = true;
183
184 cfg.array = (uintptr_t)env->tlb.tlbm;
185 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
186 cfg.params = (uintptr_t)&params;
187 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
188
189 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
190 if (ret < 0) {
191 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
192 __func__, strerror(-ret));
193 return ret;
194 }
195
196 env->kvm_sw_tlb = true;
197 return 0;
198 }
199
200
201 #if defined(TARGET_PPC64)
202 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
203 struct kvm_ppc_smmu_info *info)
204 {
205 CPUPPCState *env = &cpu->env;
206 CPUState *cs = CPU(cpu);
207
208 memset(info, 0, sizeof(*info));
209
210 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
211 * need to "guess" what the supported page sizes are.
212 *
213 * For that to work we make a few assumptions:
214 *
215 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
216 * KVM which only supports 4K and 16M pages, but supports them
217 * regardless of the backing store characteritics. We also don't
218 * support 1T segments.
219 *
220 * This is safe as if HV KVM ever supports that capability or PR
221 * KVM grows supports for more page/segment sizes, those versions
222 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
223 * will not hit this fallback
224 *
225 * - Else we are running HV KVM. This means we only support page
226 * sizes that fit in the backing store. Additionally we only
227 * advertize 64K pages if the processor is ARCH 2.06 and we assume
228 * P7 encodings for the SLB and hash table. Here too, we assume
229 * support for any newer processor will mean a kernel that
230 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
231 * this fallback.
232 */
233 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
234 /* No flags */
235 info->flags = 0;
236 info->slb_size = 64;
237
238 /* Standard 4k base page size segment */
239 info->sps[0].page_shift = 12;
240 info->sps[0].slb_enc = 0;
241 info->sps[0].enc[0].page_shift = 12;
242 info->sps[0].enc[0].pte_enc = 0;
243
244 /* Standard 16M large page size segment */
245 info->sps[1].page_shift = 24;
246 info->sps[1].slb_enc = SLB_VSID_L;
247 info->sps[1].enc[0].page_shift = 24;
248 info->sps[1].enc[0].pte_enc = 0;
249 } else {
250 int i = 0;
251
252 /* HV KVM has backing store size restrictions */
253 info->flags = KVM_PPC_PAGE_SIZES_REAL;
254
255 if (env->mmu_model & POWERPC_MMU_1TSEG) {
256 info->flags |= KVM_PPC_1T_SEGMENTS;
257 }
258
259 if (env->mmu_model == POWERPC_MMU_2_06) {
260 info->slb_size = 32;
261 } else {
262 info->slb_size = 64;
263 }
264
265 /* Standard 4k base page size segment */
266 info->sps[i].page_shift = 12;
267 info->sps[i].slb_enc = 0;
268 info->sps[i].enc[0].page_shift = 12;
269 info->sps[i].enc[0].pte_enc = 0;
270 i++;
271
272 /* 64K on MMU 2.06 */
273 if (env->mmu_model == POWERPC_MMU_2_06) {
274 info->sps[i].page_shift = 16;
275 info->sps[i].slb_enc = 0x110;
276 info->sps[i].enc[0].page_shift = 16;
277 info->sps[i].enc[0].pte_enc = 1;
278 i++;
279 }
280
281 /* Standard 16M large page size segment */
282 info->sps[i].page_shift = 24;
283 info->sps[i].slb_enc = SLB_VSID_L;
284 info->sps[i].enc[0].page_shift = 24;
285 info->sps[i].enc[0].pte_enc = 0;
286 }
287 }
288
289 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
290 {
291 CPUState *cs = CPU(cpu);
292 int ret;
293
294 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
295 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
296 if (ret == 0) {
297 return;
298 }
299 }
300
301 kvm_get_fallback_smmu_info(cpu, info);
302 }
303
304 static long getrampagesize(void)
305 {
306 struct statfs fs;
307 int ret;
308
309 if (!mem_path) {
310 /* guest RAM is backed by normal anonymous pages */
311 return getpagesize();
312 }
313
314 do {
315 ret = statfs(mem_path, &fs);
316 } while (ret != 0 && errno == EINTR);
317
318 if (ret != 0) {
319 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
320 strerror(errno));
321 exit(1);
322 }
323
324 #define HUGETLBFS_MAGIC 0x958458f6
325
326 if (fs.f_type != HUGETLBFS_MAGIC) {
327 /* Explicit mempath, but it's ordinary pages */
328 return getpagesize();
329 }
330
331 /* It's hugepage, return the huge page size */
332 return fs.f_bsize;
333 }
334
335 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
336 {
337 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
338 return true;
339 }
340
341 return (1ul << shift) <= rampgsize;
342 }
343
344 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
345 {
346 static struct kvm_ppc_smmu_info smmu_info;
347 static bool has_smmu_info;
348 CPUPPCState *env = &cpu->env;
349 long rampagesize;
350 int iq, ik, jq, jk;
351
352 /* We only handle page sizes for 64-bit server guests for now */
353 if (!(env->mmu_model & POWERPC_MMU_64)) {
354 return;
355 }
356
357 /* Collect MMU info from kernel if not already */
358 if (!has_smmu_info) {
359 kvm_get_smmu_info(cpu, &smmu_info);
360 has_smmu_info = true;
361 }
362
363 rampagesize = getrampagesize();
364
365 /* Convert to QEMU form */
366 memset(&env->sps, 0, sizeof(env->sps));
367
368 /*
369 * XXX This loop should be an entry wide AND of the capabilities that
370 * the selected CPU has with the capabilities that KVM supports.
371 */
372 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
373 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
374 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
375
376 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
377 ksps->page_shift)) {
378 continue;
379 }
380 qsps->page_shift = ksps->page_shift;
381 qsps->slb_enc = ksps->slb_enc;
382 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
383 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
384 ksps->enc[jk].page_shift)) {
385 continue;
386 }
387 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
388 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
389 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
390 break;
391 }
392 }
393 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
394 break;
395 }
396 }
397 env->slb_nr = smmu_info.slb_size;
398 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
399 env->mmu_model &= ~POWERPC_MMU_1TSEG;
400 }
401 }
402 #else /* defined (TARGET_PPC64) */
403
404 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
405 {
406 }
407
408 #endif /* !defined (TARGET_PPC64) */
409
410 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
411 {
412 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
413 }
414
415 int kvm_arch_init_vcpu(CPUState *cs)
416 {
417 PowerPCCPU *cpu = POWERPC_CPU(cs);
418 CPUPPCState *cenv = &cpu->env;
419 int ret;
420
421 /* Gather server mmu info from KVM and update the CPU state */
422 kvm_fixup_page_sizes(cpu);
423
424 /* Synchronize sregs with kvm */
425 ret = kvm_arch_sync_sregs(cpu);
426 if (ret) {
427 return ret;
428 }
429
430 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
431
432 /* Some targets support access to KVM's guest TLB. */
433 switch (cenv->mmu_model) {
434 case POWERPC_MMU_BOOKE206:
435 ret = kvm_booke206_tlb_init(cpu);
436 break;
437 default:
438 break;
439 }
440
441 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
442
443 return ret;
444 }
445
446 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
447 {
448 CPUPPCState *env = &cpu->env;
449 CPUState *cs = CPU(cpu);
450 struct kvm_dirty_tlb dirty_tlb;
451 unsigned char *bitmap;
452 int ret;
453
454 if (!env->kvm_sw_tlb) {
455 return;
456 }
457
458 bitmap = g_malloc((env->nb_tlb + 7) / 8);
459 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
460
461 dirty_tlb.bitmap = (uintptr_t)bitmap;
462 dirty_tlb.num_dirty = env->nb_tlb;
463
464 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
465 if (ret) {
466 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
467 __func__, strerror(-ret));
468 }
469
470 g_free(bitmap);
471 }
472
473 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
474 {
475 PowerPCCPU *cpu = POWERPC_CPU(cs);
476 CPUPPCState *env = &cpu->env;
477 union {
478 uint32_t u32;
479 uint64_t u64;
480 } val;
481 struct kvm_one_reg reg = {
482 .id = id,
483 .addr = (uintptr_t) &val,
484 };
485 int ret;
486
487 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
488 if (ret != 0) {
489 trace_kvm_failed_spr_get(spr, strerror(errno));
490 } else {
491 switch (id & KVM_REG_SIZE_MASK) {
492 case KVM_REG_SIZE_U32:
493 env->spr[spr] = val.u32;
494 break;
495
496 case KVM_REG_SIZE_U64:
497 env->spr[spr] = val.u64;
498 break;
499
500 default:
501 /* Don't handle this size yet */
502 abort();
503 }
504 }
505 }
506
507 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
508 {
509 PowerPCCPU *cpu = POWERPC_CPU(cs);
510 CPUPPCState *env = &cpu->env;
511 union {
512 uint32_t u32;
513 uint64_t u64;
514 } val;
515 struct kvm_one_reg reg = {
516 .id = id,
517 .addr = (uintptr_t) &val,
518 };
519 int ret;
520
521 switch (id & KVM_REG_SIZE_MASK) {
522 case KVM_REG_SIZE_U32:
523 val.u32 = env->spr[spr];
524 break;
525
526 case KVM_REG_SIZE_U64:
527 val.u64 = env->spr[spr];
528 break;
529
530 default:
531 /* Don't handle this size yet */
532 abort();
533 }
534
535 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
536 if (ret != 0) {
537 trace_kvm_failed_spr_set(spr, strerror(errno));
538 }
539 }
540
541 static int kvm_put_fp(CPUState *cs)
542 {
543 PowerPCCPU *cpu = POWERPC_CPU(cs);
544 CPUPPCState *env = &cpu->env;
545 struct kvm_one_reg reg;
546 int i;
547 int ret;
548
549 if (env->insns_flags & PPC_FLOAT) {
550 uint64_t fpscr = env->fpscr;
551 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
552
553 reg.id = KVM_REG_PPC_FPSCR;
554 reg.addr = (uintptr_t)&fpscr;
555 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
556 if (ret < 0) {
557 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
558 return ret;
559 }
560
561 for (i = 0; i < 32; i++) {
562 uint64_t vsr[2];
563
564 vsr[0] = float64_val(env->fpr[i]);
565 vsr[1] = env->vsr[i];
566 reg.addr = (uintptr_t) &vsr;
567 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
568
569 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
570 if (ret < 0) {
571 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
572 i, strerror(errno));
573 return ret;
574 }
575 }
576 }
577
578 if (env->insns_flags & PPC_ALTIVEC) {
579 reg.id = KVM_REG_PPC_VSCR;
580 reg.addr = (uintptr_t)&env->vscr;
581 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
582 if (ret < 0) {
583 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
584 return ret;
585 }
586
587 for (i = 0; i < 32; i++) {
588 reg.id = KVM_REG_PPC_VR(i);
589 reg.addr = (uintptr_t)&env->avr[i];
590 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
591 if (ret < 0) {
592 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
593 return ret;
594 }
595 }
596 }
597
598 return 0;
599 }
600
601 static int kvm_get_fp(CPUState *cs)
602 {
603 PowerPCCPU *cpu = POWERPC_CPU(cs);
604 CPUPPCState *env = &cpu->env;
605 struct kvm_one_reg reg;
606 int i;
607 int ret;
608
609 if (env->insns_flags & PPC_FLOAT) {
610 uint64_t fpscr;
611 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
612
613 reg.id = KVM_REG_PPC_FPSCR;
614 reg.addr = (uintptr_t)&fpscr;
615 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
616 if (ret < 0) {
617 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
618 return ret;
619 } else {
620 env->fpscr = fpscr;
621 }
622
623 for (i = 0; i < 32; i++) {
624 uint64_t vsr[2];
625
626 reg.addr = (uintptr_t) &vsr;
627 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
628
629 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
630 if (ret < 0) {
631 DPRINTF("Unable to get %s%d from KVM: %s\n",
632 vsx ? "VSR" : "FPR", i, strerror(errno));
633 return ret;
634 } else {
635 env->fpr[i] = vsr[0];
636 if (vsx) {
637 env->vsr[i] = vsr[1];
638 }
639 }
640 }
641 }
642
643 if (env->insns_flags & PPC_ALTIVEC) {
644 reg.id = KVM_REG_PPC_VSCR;
645 reg.addr = (uintptr_t)&env->vscr;
646 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
647 if (ret < 0) {
648 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
649 return ret;
650 }
651
652 for (i = 0; i < 32; i++) {
653 reg.id = KVM_REG_PPC_VR(i);
654 reg.addr = (uintptr_t)&env->avr[i];
655 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
656 if (ret < 0) {
657 DPRINTF("Unable to get VR%d from KVM: %s\n",
658 i, strerror(errno));
659 return ret;
660 }
661 }
662 }
663
664 return 0;
665 }
666
667 #if defined(TARGET_PPC64)
668 static int kvm_get_vpa(CPUState *cs)
669 {
670 PowerPCCPU *cpu = POWERPC_CPU(cs);
671 CPUPPCState *env = &cpu->env;
672 struct kvm_one_reg reg;
673 int ret;
674
675 reg.id = KVM_REG_PPC_VPA_ADDR;
676 reg.addr = (uintptr_t)&env->vpa_addr;
677 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
678 if (ret < 0) {
679 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
680 return ret;
681 }
682
683 assert((uintptr_t)&env->slb_shadow_size
684 == ((uintptr_t)&env->slb_shadow_addr + 8));
685 reg.id = KVM_REG_PPC_VPA_SLB;
686 reg.addr = (uintptr_t)&env->slb_shadow_addr;
687 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
688 if (ret < 0) {
689 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
690 strerror(errno));
691 return ret;
692 }
693
694 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
695 reg.id = KVM_REG_PPC_VPA_DTL;
696 reg.addr = (uintptr_t)&env->dtl_addr;
697 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
698 if (ret < 0) {
699 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
700 strerror(errno));
701 return ret;
702 }
703
704 return 0;
705 }
706
707 static int kvm_put_vpa(CPUState *cs)
708 {
709 PowerPCCPU *cpu = POWERPC_CPU(cs);
710 CPUPPCState *env = &cpu->env;
711 struct kvm_one_reg reg;
712 int ret;
713
714 /* SLB shadow or DTL can't be registered unless a master VPA is
715 * registered. That means when restoring state, if a VPA *is*
716 * registered, we need to set that up first. If not, we need to
717 * deregister the others before deregistering the master VPA */
718 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
719
720 if (env->vpa_addr) {
721 reg.id = KVM_REG_PPC_VPA_ADDR;
722 reg.addr = (uintptr_t)&env->vpa_addr;
723 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
724 if (ret < 0) {
725 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
726 return ret;
727 }
728 }
729
730 assert((uintptr_t)&env->slb_shadow_size
731 == ((uintptr_t)&env->slb_shadow_addr + 8));
732 reg.id = KVM_REG_PPC_VPA_SLB;
733 reg.addr = (uintptr_t)&env->slb_shadow_addr;
734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
737 return ret;
738 }
739
740 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
741 reg.id = KVM_REG_PPC_VPA_DTL;
742 reg.addr = (uintptr_t)&env->dtl_addr;
743 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
744 if (ret < 0) {
745 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
746 strerror(errno));
747 return ret;
748 }
749
750 if (!env->vpa_addr) {
751 reg.id = KVM_REG_PPC_VPA_ADDR;
752 reg.addr = (uintptr_t)&env->vpa_addr;
753 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
754 if (ret < 0) {
755 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
756 return ret;
757 }
758 }
759
760 return 0;
761 }
762 #endif /* TARGET_PPC64 */
763
764 int kvm_arch_put_registers(CPUState *cs, int level)
765 {
766 PowerPCCPU *cpu = POWERPC_CPU(cs);
767 CPUPPCState *env = &cpu->env;
768 struct kvm_regs regs;
769 int ret;
770 int i;
771
772 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
773 if (ret < 0) {
774 return ret;
775 }
776
777 regs.ctr = env->ctr;
778 regs.lr = env->lr;
779 regs.xer = cpu_read_xer(env);
780 regs.msr = env->msr;
781 regs.pc = env->nip;
782
783 regs.srr0 = env->spr[SPR_SRR0];
784 regs.srr1 = env->spr[SPR_SRR1];
785
786 regs.sprg0 = env->spr[SPR_SPRG0];
787 regs.sprg1 = env->spr[SPR_SPRG1];
788 regs.sprg2 = env->spr[SPR_SPRG2];
789 regs.sprg3 = env->spr[SPR_SPRG3];
790 regs.sprg4 = env->spr[SPR_SPRG4];
791 regs.sprg5 = env->spr[SPR_SPRG5];
792 regs.sprg6 = env->spr[SPR_SPRG6];
793 regs.sprg7 = env->spr[SPR_SPRG7];
794
795 regs.pid = env->spr[SPR_BOOKE_PID];
796
797 for (i = 0;i < 32; i++)
798 regs.gpr[i] = env->gpr[i];
799
800 regs.cr = 0;
801 for (i = 0; i < 8; i++) {
802 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
803 }
804
805 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
806 if (ret < 0)
807 return ret;
808
809 kvm_put_fp(cs);
810
811 if (env->tlb_dirty) {
812 kvm_sw_tlb_put(cpu);
813 env->tlb_dirty = false;
814 }
815
816 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
817 struct kvm_sregs sregs;
818
819 sregs.pvr = env->spr[SPR_PVR];
820
821 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
822
823 /* Sync SLB */
824 #ifdef TARGET_PPC64
825 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
826 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
827 if (env->slb[i].esid & SLB_ESID_V) {
828 sregs.u.s.ppc64.slb[i].slbe |= i;
829 }
830 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
831 }
832 #endif
833
834 /* Sync SRs */
835 for (i = 0; i < 16; i++) {
836 sregs.u.s.ppc32.sr[i] = env->sr[i];
837 }
838
839 /* Sync BATs */
840 for (i = 0; i < 8; i++) {
841 /* Beware. We have to swap upper and lower bits here */
842 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
843 | env->DBAT[1][i];
844 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
845 | env->IBAT[1][i];
846 }
847
848 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
849 if (ret) {
850 return ret;
851 }
852 }
853
854 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
855 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
856 }
857
858 if (cap_one_reg) {
859 int i;
860
861 /* We deliberately ignore errors here, for kernels which have
862 * the ONE_REG calls, but don't support the specific
863 * registers, there's a reasonable chance things will still
864 * work, at least until we try to migrate. */
865 for (i = 0; i < 1024; i++) {
866 uint64_t id = env->spr_cb[i].one_reg_id;
867
868 if (id != 0) {
869 kvm_put_one_spr(cs, id, i);
870 }
871 }
872
873 #ifdef TARGET_PPC64
874 if (msr_ts) {
875 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
876 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
877 }
878 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
879 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
880 }
881 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
882 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
883 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
884 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
885 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
886 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
887 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
888 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
889 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
890 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
891 }
892
893 if (cap_papr) {
894 if (kvm_put_vpa(cs) < 0) {
895 DPRINTF("Warning: Unable to set VPA information to KVM\n");
896 }
897 }
898
899 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
900 #endif /* TARGET_PPC64 */
901 }
902
903 return ret;
904 }
905
906 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
907 {
908 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
909 }
910
911 int kvm_arch_get_registers(CPUState *cs)
912 {
913 PowerPCCPU *cpu = POWERPC_CPU(cs);
914 CPUPPCState *env = &cpu->env;
915 struct kvm_regs regs;
916 struct kvm_sregs sregs;
917 uint32_t cr;
918 int i, ret;
919
920 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
921 if (ret < 0)
922 return ret;
923
924 cr = regs.cr;
925 for (i = 7; i >= 0; i--) {
926 env->crf[i] = cr & 15;
927 cr >>= 4;
928 }
929
930 env->ctr = regs.ctr;
931 env->lr = regs.lr;
932 cpu_write_xer(env, regs.xer);
933 env->msr = regs.msr;
934 env->nip = regs.pc;
935
936 env->spr[SPR_SRR0] = regs.srr0;
937 env->spr[SPR_SRR1] = regs.srr1;
938
939 env->spr[SPR_SPRG0] = regs.sprg0;
940 env->spr[SPR_SPRG1] = regs.sprg1;
941 env->spr[SPR_SPRG2] = regs.sprg2;
942 env->spr[SPR_SPRG3] = regs.sprg3;
943 env->spr[SPR_SPRG4] = regs.sprg4;
944 env->spr[SPR_SPRG5] = regs.sprg5;
945 env->spr[SPR_SPRG6] = regs.sprg6;
946 env->spr[SPR_SPRG7] = regs.sprg7;
947
948 env->spr[SPR_BOOKE_PID] = regs.pid;
949
950 for (i = 0;i < 32; i++)
951 env->gpr[i] = regs.gpr[i];
952
953 kvm_get_fp(cs);
954
955 if (cap_booke_sregs) {
956 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
957 if (ret < 0) {
958 return ret;
959 }
960
961 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
962 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
963 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
964 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
965 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
966 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
967 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
968 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
969 env->spr[SPR_DECR] = sregs.u.e.dec;
970 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
971 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
972 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
973 }
974
975 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
976 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
977 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
978 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
979 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
980 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
981 }
982
983 if (sregs.u.e.features & KVM_SREGS_E_64) {
984 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
985 }
986
987 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
988 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
989 }
990
991 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
992 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
993 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
994 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
995 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
996 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
997 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
998 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
999 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1000 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1001 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1002 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1003 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1004 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1005 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1006 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1007 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1008 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1009 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1010 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1011 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1012 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1013 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1014 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1015 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1016 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1017 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1018 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1019 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1020 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1021 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1022 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1023 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1024
1025 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1026 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1027 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1028 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1029 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1030 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1031 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1032 }
1033
1034 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1035 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1036 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1037 }
1038
1039 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1040 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1041 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1042 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1043 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1044 }
1045 }
1046
1047 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1048 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1049 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1050 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1051 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1052 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1053 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1054 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1055 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1056 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1057 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1058 }
1059
1060 if (sregs.u.e.features & KVM_SREGS_EXP) {
1061 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1062 }
1063
1064 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1065 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1066 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1067 }
1068
1069 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1070 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1071 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1072 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1073
1074 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1075 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1076 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1077 }
1078 }
1079 }
1080
1081 if (cap_segstate) {
1082 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1083 if (ret < 0) {
1084 return ret;
1085 }
1086
1087 if (!env->external_htab) {
1088 ppc_store_sdr1(env, sregs.u.s.sdr1);
1089 }
1090
1091 /* Sync SLB */
1092 #ifdef TARGET_PPC64
1093 /*
1094 * The packed SLB array we get from KVM_GET_SREGS only contains
1095 * information about valid entries. So we flush our internal
1096 * copy to get rid of stale ones, then put all valid SLB entries
1097 * back in.
1098 */
1099 memset(env->slb, 0, sizeof(env->slb));
1100 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1101 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1102 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1103 /*
1104 * Only restore valid entries
1105 */
1106 if (rb & SLB_ESID_V) {
1107 ppc_store_slb(env, rb, rs);
1108 }
1109 }
1110 #endif
1111
1112 /* Sync SRs */
1113 for (i = 0; i < 16; i++) {
1114 env->sr[i] = sregs.u.s.ppc32.sr[i];
1115 }
1116
1117 /* Sync BATs */
1118 for (i = 0; i < 8; i++) {
1119 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1120 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1121 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1122 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1123 }
1124 }
1125
1126 if (cap_hior) {
1127 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1128 }
1129
1130 if (cap_one_reg) {
1131 int i;
1132
1133 /* We deliberately ignore errors here, for kernels which have
1134 * the ONE_REG calls, but don't support the specific
1135 * registers, there's a reasonable chance things will still
1136 * work, at least until we try to migrate. */
1137 for (i = 0; i < 1024; i++) {
1138 uint64_t id = env->spr_cb[i].one_reg_id;
1139
1140 if (id != 0) {
1141 kvm_get_one_spr(cs, id, i);
1142 }
1143 }
1144
1145 #ifdef TARGET_PPC64
1146 if (msr_ts) {
1147 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1148 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1149 }
1150 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1151 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1152 }
1153 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1154 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1155 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1156 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1157 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1158 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1159 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1160 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1161 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1162 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1163 }
1164
1165 if (cap_papr) {
1166 if (kvm_get_vpa(cs) < 0) {
1167 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1168 }
1169 }
1170
1171 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1172 #endif
1173 }
1174
1175 return 0;
1176 }
1177
1178 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1179 {
1180 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1181
1182 if (irq != PPC_INTERRUPT_EXT) {
1183 return 0;
1184 }
1185
1186 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1187 return 0;
1188 }
1189
1190 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1191
1192 return 0;
1193 }
1194
1195 #if defined(TARGET_PPCEMB)
1196 #define PPC_INPUT_INT PPC40x_INPUT_INT
1197 #elif defined(TARGET_PPC64)
1198 #define PPC_INPUT_INT PPC970_INPUT_INT
1199 #else
1200 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1201 #endif
1202
1203 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1204 {
1205 PowerPCCPU *cpu = POWERPC_CPU(cs);
1206 CPUPPCState *env = &cpu->env;
1207 int r;
1208 unsigned irq;
1209
1210 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1211 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1212 if (!cap_interrupt_level &&
1213 run->ready_for_interrupt_injection &&
1214 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1215 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1216 {
1217 /* For now KVM disregards the 'irq' argument. However, in the
1218 * future KVM could cache it in-kernel to avoid a heavyweight exit
1219 * when reading the UIC.
1220 */
1221 irq = KVM_INTERRUPT_SET;
1222
1223 DPRINTF("injected interrupt %d\n", irq);
1224 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1225 if (r < 0) {
1226 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1227 }
1228
1229 /* Always wake up soon in case the interrupt was level based */
1230 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1231 (get_ticks_per_sec() / 50));
1232 }
1233
1234 /* We don't know if there are more interrupts pending after this. However,
1235 * the guest will return to userspace in the course of handling this one
1236 * anyways, so we will get a chance to deliver the rest. */
1237 }
1238
1239 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1240 {
1241 }
1242
1243 int kvm_arch_process_async_events(CPUState *cs)
1244 {
1245 return cs->halted;
1246 }
1247
1248 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1249 {
1250 CPUState *cs = CPU(cpu);
1251 CPUPPCState *env = &cpu->env;
1252
1253 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1254 cs->halted = 1;
1255 cs->exception_index = EXCP_HLT;
1256 }
1257
1258 return 0;
1259 }
1260
1261 /* map dcr access to existing qemu dcr emulation */
1262 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1263 {
1264 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1265 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1266
1267 return 0;
1268 }
1269
1270 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1271 {
1272 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1273 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1274
1275 return 0;
1276 }
1277
1278 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1279 {
1280 /* Mixed endian case is not handled */
1281 uint32_t sc = debug_inst_opcode;
1282
1283 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1284 sizeof(sc), 0) ||
1285 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1286 return -EINVAL;
1287 }
1288
1289 return 0;
1290 }
1291
1292 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1293 {
1294 uint32_t sc;
1295
1296 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1297 sc != debug_inst_opcode ||
1298 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1299 sizeof(sc), 1)) {
1300 return -EINVAL;
1301 }
1302
1303 return 0;
1304 }
1305
1306 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1307 {
1308 /* Software Breakpoint updates */
1309 if (kvm_sw_breakpoints_active(cs)) {
1310 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1311 }
1312 }
1313
1314 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1315 {
1316 CPUState *cs = CPU(cpu);
1317 CPUPPCState *env = &cpu->env;
1318 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1319 int handle = 0;
1320
1321 if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1322 handle = 1;
1323 } else {
1324 /* QEMU is not able to handle debug exception, so inject
1325 * program exception to guest;
1326 * Yes program exception NOT debug exception !!
1327 * For software breakpoint QEMU uses a privileged instruction;
1328 * So there cannot be any reason that we are here for guest
1329 * set debug exception, only possibility is guest executed a
1330 * privileged / illegal instruction and that's why we are
1331 * injecting a program interrupt.
1332 */
1333
1334 cpu_synchronize_state(cs);
1335 /* env->nip is PC, so increment this by 4 to use
1336 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1337 */
1338 env->nip += 4;
1339 cs->exception_index = POWERPC_EXCP_PROGRAM;
1340 env->error_code = POWERPC_EXCP_INVAL;
1341 ppc_cpu_do_interrupt(cs);
1342 }
1343
1344 return handle;
1345 }
1346
1347 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1348 {
1349 PowerPCCPU *cpu = POWERPC_CPU(cs);
1350 CPUPPCState *env = &cpu->env;
1351 int ret;
1352
1353 switch (run->exit_reason) {
1354 case KVM_EXIT_DCR:
1355 if (run->dcr.is_write) {
1356 DPRINTF("handle dcr write\n");
1357 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1358 } else {
1359 DPRINTF("handle dcr read\n");
1360 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1361 }
1362 break;
1363 case KVM_EXIT_HLT:
1364 DPRINTF("handle halt\n");
1365 ret = kvmppc_handle_halt(cpu);
1366 break;
1367 #if defined(TARGET_PPC64)
1368 case KVM_EXIT_PAPR_HCALL:
1369 DPRINTF("handle PAPR hypercall\n");
1370 run->papr_hcall.ret = spapr_hypercall(cpu,
1371 run->papr_hcall.nr,
1372 run->papr_hcall.args);
1373 ret = 0;
1374 break;
1375 #endif
1376 case KVM_EXIT_EPR:
1377 DPRINTF("handle epr\n");
1378 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1379 ret = 0;
1380 break;
1381 case KVM_EXIT_WATCHDOG:
1382 DPRINTF("handle watchdog expiry\n");
1383 watchdog_perform_action();
1384 ret = 0;
1385 break;
1386
1387 case KVM_EXIT_DEBUG:
1388 DPRINTF("handle debug exception\n");
1389 if (kvm_handle_debug(cpu, run)) {
1390 ret = EXCP_DEBUG;
1391 break;
1392 }
1393 /* re-enter, this exception was guest-internal */
1394 ret = 0;
1395 break;
1396
1397 default:
1398 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1399 ret = -1;
1400 break;
1401 }
1402
1403 return ret;
1404 }
1405
1406 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1407 {
1408 CPUState *cs = CPU(cpu);
1409 uint32_t bits = tsr_bits;
1410 struct kvm_one_reg reg = {
1411 .id = KVM_REG_PPC_OR_TSR,
1412 .addr = (uintptr_t) &bits,
1413 };
1414
1415 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1416 }
1417
1418 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1419 {
1420
1421 CPUState *cs = CPU(cpu);
1422 uint32_t bits = tsr_bits;
1423 struct kvm_one_reg reg = {
1424 .id = KVM_REG_PPC_CLEAR_TSR,
1425 .addr = (uintptr_t) &bits,
1426 };
1427
1428 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1429 }
1430
1431 int kvmppc_set_tcr(PowerPCCPU *cpu)
1432 {
1433 CPUState *cs = CPU(cpu);
1434 CPUPPCState *env = &cpu->env;
1435 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1436
1437 struct kvm_one_reg reg = {
1438 .id = KVM_REG_PPC_TCR,
1439 .addr = (uintptr_t) &tcr,
1440 };
1441
1442 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1443 }
1444
1445 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1446 {
1447 CPUState *cs = CPU(cpu);
1448 int ret;
1449
1450 if (!kvm_enabled()) {
1451 return -1;
1452 }
1453
1454 if (!cap_ppc_watchdog) {
1455 printf("warning: KVM does not support watchdog");
1456 return -1;
1457 }
1458
1459 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1460 if (ret < 0) {
1461 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1462 __func__, strerror(-ret));
1463 return ret;
1464 }
1465
1466 return ret;
1467 }
1468
1469 static int read_cpuinfo(const char *field, char *value, int len)
1470 {
1471 FILE *f;
1472 int ret = -1;
1473 int field_len = strlen(field);
1474 char line[512];
1475
1476 f = fopen("/proc/cpuinfo", "r");
1477 if (!f) {
1478 return -1;
1479 }
1480
1481 do {
1482 if (!fgets(line, sizeof(line), f)) {
1483 break;
1484 }
1485 if (!strncmp(line, field, field_len)) {
1486 pstrcpy(value, len, line);
1487 ret = 0;
1488 break;
1489 }
1490 } while(*line);
1491
1492 fclose(f);
1493
1494 return ret;
1495 }
1496
1497 uint32_t kvmppc_get_tbfreq(void)
1498 {
1499 char line[512];
1500 char *ns;
1501 uint32_t retval = get_ticks_per_sec();
1502
1503 if (read_cpuinfo("timebase", line, sizeof(line))) {
1504 return retval;
1505 }
1506
1507 if (!(ns = strchr(line, ':'))) {
1508 return retval;
1509 }
1510
1511 ns++;
1512
1513 retval = atoi(ns);
1514 return retval;
1515 }
1516
1517 bool kvmppc_get_host_serial(char **value)
1518 {
1519 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1520 NULL);
1521 }
1522
1523 bool kvmppc_get_host_model(char **value)
1524 {
1525 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1526 }
1527
1528 /* Try to find a device tree node for a CPU with clock-frequency property */
1529 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1530 {
1531 struct dirent *dirp;
1532 DIR *dp;
1533
1534 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1535 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1536 return -1;
1537 }
1538
1539 buf[0] = '\0';
1540 while ((dirp = readdir(dp)) != NULL) {
1541 FILE *f;
1542 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1543 dirp->d_name);
1544 f = fopen(buf, "r");
1545 if (f) {
1546 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1547 fclose(f);
1548 break;
1549 }
1550 buf[0] = '\0';
1551 }
1552 closedir(dp);
1553 if (buf[0] == '\0') {
1554 printf("Unknown host!\n");
1555 return -1;
1556 }
1557
1558 return 0;
1559 }
1560
1561 /* Read a CPU node property from the host device tree that's a single
1562 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1563 * (can't find or open the property, or doesn't understand the
1564 * format) */
1565 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1566 {
1567 char buf[PATH_MAX];
1568 union {
1569 uint32_t v32;
1570 uint64_t v64;
1571 } u;
1572 FILE *f;
1573 int len;
1574
1575 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1576 return -1;
1577 }
1578
1579 strncat(buf, "/", sizeof(buf) - strlen(buf));
1580 strncat(buf, propname, sizeof(buf) - strlen(buf));
1581
1582 f = fopen(buf, "rb");
1583 if (!f) {
1584 return -1;
1585 }
1586
1587 len = fread(&u, 1, sizeof(u), f);
1588 fclose(f);
1589 switch (len) {
1590 case 4:
1591 /* property is a 32-bit quantity */
1592 return be32_to_cpu(u.v32);
1593 case 8:
1594 return be64_to_cpu(u.v64);
1595 }
1596
1597 return 0;
1598 }
1599
1600 uint64_t kvmppc_get_clockfreq(void)
1601 {
1602 return kvmppc_read_int_cpu_dt("clock-frequency");
1603 }
1604
1605 uint32_t kvmppc_get_vmx(void)
1606 {
1607 return kvmppc_read_int_cpu_dt("ibm,vmx");
1608 }
1609
1610 uint32_t kvmppc_get_dfp(void)
1611 {
1612 return kvmppc_read_int_cpu_dt("ibm,dfp");
1613 }
1614
1615 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1616 {
1617 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1618 CPUState *cs = CPU(cpu);
1619
1620 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1621 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1622 return 0;
1623 }
1624
1625 return 1;
1626 }
1627
1628 int kvmppc_get_hasidle(CPUPPCState *env)
1629 {
1630 struct kvm_ppc_pvinfo pvinfo;
1631
1632 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1633 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1634 return 1;
1635 }
1636
1637 return 0;
1638 }
1639
1640 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1641 {
1642 uint32_t *hc = (uint32_t*)buf;
1643 struct kvm_ppc_pvinfo pvinfo;
1644
1645 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1646 memcpy(buf, pvinfo.hcall, buf_len);
1647 return 0;
1648 }
1649
1650 /*
1651 * Fallback to always fail hypercalls regardless of endianness:
1652 *
1653 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1654 * li r3, -1
1655 * b .+8 (becomes nop in wrong endian)
1656 * bswap32(li r3, -1)
1657 */
1658
1659 hc[0] = cpu_to_be32(0x08000048);
1660 hc[1] = cpu_to_be32(0x3860ffff);
1661 hc[2] = cpu_to_be32(0x48000008);
1662 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1663
1664 return 0;
1665 }
1666
1667 void kvmppc_set_papr(PowerPCCPU *cpu)
1668 {
1669 CPUState *cs = CPU(cpu);
1670 int ret;
1671
1672 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1673 if (ret) {
1674 cpu_abort(cs, "This KVM version does not support PAPR\n");
1675 }
1676
1677 /* Update the capability flag so we sync the right information
1678 * with kvm */
1679 cap_papr = 1;
1680 }
1681
1682 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1683 {
1684 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1685 }
1686
1687 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1688 {
1689 CPUState *cs = CPU(cpu);
1690 int ret;
1691
1692 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1693 if (ret && mpic_proxy) {
1694 cpu_abort(cs, "This KVM version does not support EPR\n");
1695 }
1696 }
1697
1698 int kvmppc_smt_threads(void)
1699 {
1700 return cap_ppc_smt ? cap_ppc_smt : 1;
1701 }
1702
1703 #ifdef TARGET_PPC64
1704 off_t kvmppc_alloc_rma(void **rma)
1705 {
1706 off_t size;
1707 int fd;
1708 struct kvm_allocate_rma ret;
1709
1710 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1711 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1712 * not necessary on this hardware
1713 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1714 *
1715 * FIXME: We should allow the user to force contiguous RMA
1716 * allocation in the cap_ppc_rma==1 case.
1717 */
1718 if (cap_ppc_rma < 2) {
1719 return 0;
1720 }
1721
1722 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1723 if (fd < 0) {
1724 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1725 strerror(errno));
1726 return -1;
1727 }
1728
1729 size = MIN(ret.rma_size, 256ul << 20);
1730
1731 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1732 if (*rma == MAP_FAILED) {
1733 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1734 return -1;
1735 };
1736
1737 return size;
1738 }
1739
1740 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1741 {
1742 struct kvm_ppc_smmu_info info;
1743 long rampagesize, best_page_shift;
1744 int i;
1745
1746 if (cap_ppc_rma >= 2) {
1747 return current_size;
1748 }
1749
1750 /* Find the largest hardware supported page size that's less than
1751 * or equal to the (logical) backing page size of guest RAM */
1752 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1753 rampagesize = getrampagesize();
1754 best_page_shift = 0;
1755
1756 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1757 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1758
1759 if (!sps->page_shift) {
1760 continue;
1761 }
1762
1763 if ((sps->page_shift > best_page_shift)
1764 && ((1UL << sps->page_shift) <= rampagesize)) {
1765 best_page_shift = sps->page_shift;
1766 }
1767 }
1768
1769 return MIN(current_size,
1770 1ULL << (best_page_shift + hash_shift - 7));
1771 }
1772 #endif
1773
1774 bool kvmppc_spapr_use_multitce(void)
1775 {
1776 return cap_spapr_multitce;
1777 }
1778
1779 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
1780 bool vfio_accel)
1781 {
1782 struct kvm_create_spapr_tce args = {
1783 .liobn = liobn,
1784 .window_size = window_size,
1785 };
1786 long len;
1787 int fd;
1788 void *table;
1789
1790 /* Must set fd to -1 so we don't try to munmap when called for
1791 * destroying the table, which the upper layers -will- do
1792 */
1793 *pfd = -1;
1794 if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
1795 return NULL;
1796 }
1797
1798 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1799 if (fd < 0) {
1800 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1801 liobn);
1802 return NULL;
1803 }
1804
1805 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1806 /* FIXME: round this up to page size */
1807
1808 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1809 if (table == MAP_FAILED) {
1810 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1811 liobn);
1812 close(fd);
1813 return NULL;
1814 }
1815
1816 *pfd = fd;
1817 return table;
1818 }
1819
1820 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
1821 {
1822 long len;
1823
1824 if (fd < 0) {
1825 return -1;
1826 }
1827
1828 len = nb_table * sizeof(uint64_t);
1829 if ((munmap(table, len) < 0) ||
1830 (close(fd) < 0)) {
1831 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1832 strerror(errno));
1833 /* Leak the table */
1834 }
1835
1836 return 0;
1837 }
1838
1839 int kvmppc_reset_htab(int shift_hint)
1840 {
1841 uint32_t shift = shift_hint;
1842
1843 if (!kvm_enabled()) {
1844 /* Full emulation, tell caller to allocate htab itself */
1845 return 0;
1846 }
1847 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1848 int ret;
1849 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1850 if (ret == -ENOTTY) {
1851 /* At least some versions of PR KVM advertise the
1852 * capability, but don't implement the ioctl(). Oops.
1853 * Return 0 so that we allocate the htab in qemu, as is
1854 * correct for PR. */
1855 return 0;
1856 } else if (ret < 0) {
1857 return ret;
1858 }
1859 return shift;
1860 }
1861
1862 /* We have a kernel that predates the htab reset calls. For PR
1863 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1864 * this era, it has allocated a 16MB fixed size hash table
1865 * already. Kernels of this era have the GET_PVINFO capability
1866 * only on PR, so we use this hack to determine the right
1867 * answer */
1868 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1869 /* PR - tell caller to allocate htab */
1870 return 0;
1871 } else {
1872 /* HV - assume 16MB kernel allocated htab */
1873 return 24;
1874 }
1875 }
1876
1877 static inline uint32_t mfpvr(void)
1878 {
1879 uint32_t pvr;
1880
1881 asm ("mfpvr %0"
1882 : "=r"(pvr));
1883 return pvr;
1884 }
1885
1886 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1887 {
1888 if (on) {
1889 *word |= flags;
1890 } else {
1891 *word &= ~flags;
1892 }
1893 }
1894
1895 static void kvmppc_host_cpu_initfn(Object *obj)
1896 {
1897 assert(kvm_enabled());
1898 }
1899
1900 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1901 {
1902 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1903 uint32_t vmx = kvmppc_get_vmx();
1904 uint32_t dfp = kvmppc_get_dfp();
1905 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1906 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1907
1908 /* Now fix up the class with information we can query from the host */
1909 pcc->pvr = mfpvr();
1910
1911 if (vmx != -1) {
1912 /* Only override when we know what the host supports */
1913 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1914 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1915 }
1916 if (dfp != -1) {
1917 /* Only override when we know what the host supports */
1918 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1919 }
1920
1921 if (dcache_size != -1) {
1922 pcc->l1_dcache_size = dcache_size;
1923 }
1924
1925 if (icache_size != -1) {
1926 pcc->l1_icache_size = icache_size;
1927 }
1928 }
1929
1930 bool kvmppc_has_cap_epr(void)
1931 {
1932 return cap_epr;
1933 }
1934
1935 bool kvmppc_has_cap_htab_fd(void)
1936 {
1937 return cap_htab_fd;
1938 }
1939
1940 bool kvmppc_has_cap_fixup_hcalls(void)
1941 {
1942 return cap_fixup_hcalls;
1943 }
1944
1945 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1946 {
1947 ObjectClass *oc = OBJECT_CLASS(pcc);
1948
1949 while (oc && !object_class_is_abstract(oc)) {
1950 oc = object_class_get_parent(oc);
1951 }
1952 assert(oc);
1953
1954 return POWERPC_CPU_CLASS(oc);
1955 }
1956
1957 static int kvm_ppc_register_host_cpu_type(void)
1958 {
1959 TypeInfo type_info = {
1960 .name = TYPE_HOST_POWERPC_CPU,
1961 .instance_init = kvmppc_host_cpu_initfn,
1962 .class_init = kvmppc_host_cpu_class_init,
1963 };
1964 uint32_t host_pvr = mfpvr();
1965 PowerPCCPUClass *pvr_pcc;
1966 DeviceClass *dc;
1967
1968 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1969 if (pvr_pcc == NULL) {
1970 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1971 }
1972 if (pvr_pcc == NULL) {
1973 return -1;
1974 }
1975 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1976 type_register(&type_info);
1977
1978 /* Register generic family CPU class for a family */
1979 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1980 dc = DEVICE_CLASS(pvr_pcc);
1981 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1982 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1983 type_register(&type_info);
1984
1985 return 0;
1986 }
1987
1988 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1989 {
1990 struct kvm_rtas_token_args args = {
1991 .token = token,
1992 };
1993
1994 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1995 return -ENOENT;
1996 }
1997
1998 strncpy(args.name, function, sizeof(args.name));
1999
2000 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2001 }
2002
2003 int kvmppc_get_htab_fd(bool write)
2004 {
2005 struct kvm_get_htab_fd s = {
2006 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2007 .start_index = 0,
2008 };
2009
2010 if (!cap_htab_fd) {
2011 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2012 return -1;
2013 }
2014
2015 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2016 }
2017
2018 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2019 {
2020 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2021 uint8_t buf[bufsize];
2022 ssize_t rc;
2023
2024 do {
2025 rc = read(fd, buf, bufsize);
2026 if (rc < 0) {
2027 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2028 strerror(errno));
2029 return rc;
2030 } else if (rc) {
2031 /* Kernel already retuns data in BE format for the file */
2032 qemu_put_buffer(f, buf, rc);
2033 }
2034 } while ((rc != 0)
2035 && ((max_ns < 0)
2036 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2037
2038 return (rc == 0) ? 1 : 0;
2039 }
2040
2041 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2042 uint16_t n_valid, uint16_t n_invalid)
2043 {
2044 struct kvm_get_htab_header *buf;
2045 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2046 ssize_t rc;
2047
2048 buf = alloca(chunksize);
2049 /* This is KVM on ppc, so this is all big-endian */
2050 buf->index = index;
2051 buf->n_valid = n_valid;
2052 buf->n_invalid = n_invalid;
2053
2054 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2055
2056 rc = write(fd, buf, chunksize);
2057 if (rc < 0) {
2058 fprintf(stderr, "Error writing KVM hash table: %s\n",
2059 strerror(errno));
2060 return rc;
2061 }
2062 if (rc != chunksize) {
2063 /* We should never get a short write on a single chunk */
2064 fprintf(stderr, "Short write, restoring KVM hash table\n");
2065 return -1;
2066 }
2067 return 0;
2068 }
2069
2070 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2071 {
2072 return true;
2073 }
2074
2075 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2076 {
2077 return 1;
2078 }
2079
2080 int kvm_arch_on_sigbus(int code, void *addr)
2081 {
2082 return 1;
2083 }
2084
2085 void kvm_arch_init_irq_routing(KVMState *s)
2086 {
2087 }
2088
2089 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
2090 {
2091 return -EINVAL;
2092 }
2093
2094 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
2095 {
2096 return -EINVAL;
2097 }
2098
2099 void kvm_arch_remove_all_hw_breakpoints(void)
2100 {
2101 }
2102
2103 struct kvm_get_htab_buf {
2104 struct kvm_get_htab_header header;
2105 /*
2106 * We require one extra byte for read
2107 */
2108 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2109 };
2110
2111 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2112 {
2113 int htab_fd;
2114 struct kvm_get_htab_fd ghf;
2115 struct kvm_get_htab_buf *hpte_buf;
2116
2117 ghf.flags = 0;
2118 ghf.start_index = pte_index;
2119 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2120 if (htab_fd < 0) {
2121 goto error_out;
2122 }
2123
2124 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2125 /*
2126 * Read the hpte group
2127 */
2128 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2129 goto out_close;
2130 }
2131
2132 close(htab_fd);
2133 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2134
2135 out_close:
2136 g_free(hpte_buf);
2137 close(htab_fd);
2138 error_out:
2139 return 0;
2140 }
2141
2142 void kvmppc_hash64_free_pteg(uint64_t token)
2143 {
2144 struct kvm_get_htab_buf *htab_buf;
2145
2146 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2147 hpte);
2148 g_free(htab_buf);
2149 return;
2150 }
2151
2152 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2153 target_ulong pte0, target_ulong pte1)
2154 {
2155 int htab_fd;
2156 struct kvm_get_htab_fd ghf;
2157 struct kvm_get_htab_buf hpte_buf;
2158
2159 ghf.flags = 0;
2160 ghf.start_index = 0; /* Ignored */
2161 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2162 if (htab_fd < 0) {
2163 goto error_out;
2164 }
2165
2166 hpte_buf.header.n_valid = 1;
2167 hpte_buf.header.n_invalid = 0;
2168 hpte_buf.header.index = pte_index;
2169 hpte_buf.hpte[0] = pte0;
2170 hpte_buf.hpte[1] = pte1;
2171 /*
2172 * Write the hpte entry.
2173 * CAUTION: write() has the warn_unused_result attribute. Hence we
2174 * need to check the return value, even though we do nothing.
2175 */
2176 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2177 goto out_close;
2178 }
2179
2180 out_close:
2181 close(htab_fd);
2182 return;
2183
2184 error_out:
2185 return;
2186 }