]> git.proxmox.com Git - mirror_qemu.git/blob - target-ppc/kvm.c
target-ppc: kvm: Fix memory overflow issue about strncat()
[mirror_qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
34
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
42
43 //#define DEBUG_KVM
44
45 #ifdef DEBUG_KVM
46 #define DPRINTF(fmt, ...) \
47 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
48 #else
49 #define DPRINTF(fmt, ...) \
50 do { } while (0)
51 #endif
52
53 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54
55 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
56 KVM_CAP_LAST_INFO
57 };
58
59 static int cap_interrupt_unset = false;
60 static int cap_interrupt_level = false;
61 static int cap_segstate;
62 static int cap_booke_sregs;
63 static int cap_ppc_smt;
64 static int cap_ppc_rma;
65 static int cap_spapr_tce;
66 static int cap_spapr_multitce;
67 static int cap_spapr_vfio;
68 static int cap_hior;
69 static int cap_one_reg;
70 static int cap_epr;
71 static int cap_ppc_watchdog;
72 static int cap_papr;
73 static int cap_htab_fd;
74 static int cap_fixup_hcalls;
75
76 static uint32_t debug_inst_opcode;
77
78 /* XXX We have a race condition where we actually have a level triggered
79 * interrupt, but the infrastructure can't expose that yet, so the guest
80 * takes but ignores it, goes to sleep and never gets notified that there's
81 * still an interrupt pending.
82 *
83 * As a quick workaround, let's just wake up again 20 ms after we injected
84 * an interrupt. That way we can assure that we're always reinjecting
85 * interrupts in case the guest swallowed them.
86 */
87 static QEMUTimer *idle_timer;
88
89 static void kvm_kick_cpu(void *opaque)
90 {
91 PowerPCCPU *cpu = opaque;
92
93 qemu_cpu_kick(CPU(cpu));
94 }
95
96 static int kvm_ppc_register_host_cpu_type(void);
97
98 int kvm_arch_init(KVMState *s)
99 {
100 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
101 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
102 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
103 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
104 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
105 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
106 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
107 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
108 cap_spapr_vfio = false;
109 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
110 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
111 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
112 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
113 /* Note: we don't set cap_papr here, because this capability is
114 * only activated after this by kvmppc_set_papr() */
115 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
116 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
117
118 if (!cap_interrupt_level) {
119 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
120 "VM to stall at times!\n");
121 }
122
123 kvm_ppc_register_host_cpu_type();
124
125 return 0;
126 }
127
128 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
129 {
130 CPUPPCState *cenv = &cpu->env;
131 CPUState *cs = CPU(cpu);
132 struct kvm_sregs sregs;
133 int ret;
134
135 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
136 /* What we're really trying to say is "if we're on BookE, we use
137 the native PVR for now". This is the only sane way to check
138 it though, so we potentially confuse users that they can run
139 BookE guests on BookS. Let's hope nobody dares enough :) */
140 return 0;
141 } else {
142 if (!cap_segstate) {
143 fprintf(stderr, "kvm error: missing PVR setting capability\n");
144 return -ENOSYS;
145 }
146 }
147
148 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
149 if (ret) {
150 return ret;
151 }
152
153 sregs.pvr = cenv->spr[SPR_PVR];
154 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
155 }
156
157 /* Set up a shared TLB array with KVM */
158 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
159 {
160 CPUPPCState *env = &cpu->env;
161 CPUState *cs = CPU(cpu);
162 struct kvm_book3e_206_tlb_params params = {};
163 struct kvm_config_tlb cfg = {};
164 unsigned int entries = 0;
165 int ret, i;
166
167 if (!kvm_enabled() ||
168 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
169 return 0;
170 }
171
172 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
173
174 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
175 params.tlb_sizes[i] = booke206_tlb_size(env, i);
176 params.tlb_ways[i] = booke206_tlb_ways(env, i);
177 entries += params.tlb_sizes[i];
178 }
179
180 assert(entries == env->nb_tlb);
181 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
182
183 env->tlb_dirty = true;
184
185 cfg.array = (uintptr_t)env->tlb.tlbm;
186 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
187 cfg.params = (uintptr_t)&params;
188 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
189
190 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
191 if (ret < 0) {
192 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
193 __func__, strerror(-ret));
194 return ret;
195 }
196
197 env->kvm_sw_tlb = true;
198 return 0;
199 }
200
201
202 #if defined(TARGET_PPC64)
203 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
204 struct kvm_ppc_smmu_info *info)
205 {
206 CPUPPCState *env = &cpu->env;
207 CPUState *cs = CPU(cpu);
208
209 memset(info, 0, sizeof(*info));
210
211 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
212 * need to "guess" what the supported page sizes are.
213 *
214 * For that to work we make a few assumptions:
215 *
216 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
217 * KVM which only supports 4K and 16M pages, but supports them
218 * regardless of the backing store characteritics. We also don't
219 * support 1T segments.
220 *
221 * This is safe as if HV KVM ever supports that capability or PR
222 * KVM grows supports for more page/segment sizes, those versions
223 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
224 * will not hit this fallback
225 *
226 * - Else we are running HV KVM. This means we only support page
227 * sizes that fit in the backing store. Additionally we only
228 * advertize 64K pages if the processor is ARCH 2.06 and we assume
229 * P7 encodings for the SLB and hash table. Here too, we assume
230 * support for any newer processor will mean a kernel that
231 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
232 * this fallback.
233 */
234 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
235 /* No flags */
236 info->flags = 0;
237 info->slb_size = 64;
238
239 /* Standard 4k base page size segment */
240 info->sps[0].page_shift = 12;
241 info->sps[0].slb_enc = 0;
242 info->sps[0].enc[0].page_shift = 12;
243 info->sps[0].enc[0].pte_enc = 0;
244
245 /* Standard 16M large page size segment */
246 info->sps[1].page_shift = 24;
247 info->sps[1].slb_enc = SLB_VSID_L;
248 info->sps[1].enc[0].page_shift = 24;
249 info->sps[1].enc[0].pte_enc = 0;
250 } else {
251 int i = 0;
252
253 /* HV KVM has backing store size restrictions */
254 info->flags = KVM_PPC_PAGE_SIZES_REAL;
255
256 if (env->mmu_model & POWERPC_MMU_1TSEG) {
257 info->flags |= KVM_PPC_1T_SEGMENTS;
258 }
259
260 if (env->mmu_model == POWERPC_MMU_2_06) {
261 info->slb_size = 32;
262 } else {
263 info->slb_size = 64;
264 }
265
266 /* Standard 4k base page size segment */
267 info->sps[i].page_shift = 12;
268 info->sps[i].slb_enc = 0;
269 info->sps[i].enc[0].page_shift = 12;
270 info->sps[i].enc[0].pte_enc = 0;
271 i++;
272
273 /* 64K on MMU 2.06 */
274 if (env->mmu_model == POWERPC_MMU_2_06) {
275 info->sps[i].page_shift = 16;
276 info->sps[i].slb_enc = 0x110;
277 info->sps[i].enc[0].page_shift = 16;
278 info->sps[i].enc[0].pte_enc = 1;
279 i++;
280 }
281
282 /* Standard 16M large page size segment */
283 info->sps[i].page_shift = 24;
284 info->sps[i].slb_enc = SLB_VSID_L;
285 info->sps[i].enc[0].page_shift = 24;
286 info->sps[i].enc[0].pte_enc = 0;
287 }
288 }
289
290 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
291 {
292 CPUState *cs = CPU(cpu);
293 int ret;
294
295 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
296 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
297 if (ret == 0) {
298 return;
299 }
300 }
301
302 kvm_get_fallback_smmu_info(cpu, info);
303 }
304
305 static long getrampagesize(void)
306 {
307 struct statfs fs;
308 int ret;
309
310 if (!mem_path) {
311 /* guest RAM is backed by normal anonymous pages */
312 return getpagesize();
313 }
314
315 do {
316 ret = statfs(mem_path, &fs);
317 } while (ret != 0 && errno == EINTR);
318
319 if (ret != 0) {
320 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
321 strerror(errno));
322 exit(1);
323 }
324
325 #define HUGETLBFS_MAGIC 0x958458f6
326
327 if (fs.f_type != HUGETLBFS_MAGIC) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
330 }
331
332 /* It's hugepage, return the huge page size */
333 return fs.f_bsize;
334 }
335
336 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
337 {
338 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
339 return true;
340 }
341
342 return (1ul << shift) <= rampgsize;
343 }
344
345 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
346 {
347 static struct kvm_ppc_smmu_info smmu_info;
348 static bool has_smmu_info;
349 CPUPPCState *env = &cpu->env;
350 long rampagesize;
351 int iq, ik, jq, jk;
352
353 /* We only handle page sizes for 64-bit server guests for now */
354 if (!(env->mmu_model & POWERPC_MMU_64)) {
355 return;
356 }
357
358 /* Collect MMU info from kernel if not already */
359 if (!has_smmu_info) {
360 kvm_get_smmu_info(cpu, &smmu_info);
361 has_smmu_info = true;
362 }
363
364 rampagesize = getrampagesize();
365
366 /* Convert to QEMU form */
367 memset(&env->sps, 0, sizeof(env->sps));
368
369 /*
370 * XXX This loop should be an entry wide AND of the capabilities that
371 * the selected CPU has with the capabilities that KVM supports.
372 */
373 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
374 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
375 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
376
377 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
378 ksps->page_shift)) {
379 continue;
380 }
381 qsps->page_shift = ksps->page_shift;
382 qsps->slb_enc = ksps->slb_enc;
383 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
384 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
385 ksps->enc[jk].page_shift)) {
386 continue;
387 }
388 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
389 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
390 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
391 break;
392 }
393 }
394 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
395 break;
396 }
397 }
398 env->slb_nr = smmu_info.slb_size;
399 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
400 env->mmu_model &= ~POWERPC_MMU_1TSEG;
401 }
402 }
403 #else /* defined (TARGET_PPC64) */
404
405 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
406 {
407 }
408
409 #endif /* !defined (TARGET_PPC64) */
410
411 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
412 {
413 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
414 }
415
416 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
417 * book3s supports only 1 watchpoint, so array size
418 * of 4 is sufficient for now.
419 */
420 #define MAX_HW_BKPTS 4
421
422 static struct HWBreakpoint {
423 target_ulong addr;
424 int type;
425 } hw_debug_points[MAX_HW_BKPTS];
426
427 static CPUWatchpoint hw_watchpoint;
428
429 /* Default there is no breakpoint and watchpoint supported */
430 static int max_hw_breakpoint;
431 static int max_hw_watchpoint;
432 static int nb_hw_breakpoint;
433 static int nb_hw_watchpoint;
434
435 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
436 {
437 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
438 max_hw_breakpoint = 2;
439 max_hw_watchpoint = 2;
440 }
441
442 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
443 fprintf(stderr, "Error initializing h/w breakpoints\n");
444 return;
445 }
446 }
447
448 int kvm_arch_init_vcpu(CPUState *cs)
449 {
450 PowerPCCPU *cpu = POWERPC_CPU(cs);
451 CPUPPCState *cenv = &cpu->env;
452 int ret;
453
454 /* Gather server mmu info from KVM and update the CPU state */
455 kvm_fixup_page_sizes(cpu);
456
457 /* Synchronize sregs with kvm */
458 ret = kvm_arch_sync_sregs(cpu);
459 if (ret) {
460 return ret;
461 }
462
463 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
464
465 /* Some targets support access to KVM's guest TLB. */
466 switch (cenv->mmu_model) {
467 case POWERPC_MMU_BOOKE206:
468 ret = kvm_booke206_tlb_init(cpu);
469 break;
470 default:
471 break;
472 }
473
474 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
475 kvmppc_hw_debug_points_init(cenv);
476
477 return ret;
478 }
479
480 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
481 {
482 CPUPPCState *env = &cpu->env;
483 CPUState *cs = CPU(cpu);
484 struct kvm_dirty_tlb dirty_tlb;
485 unsigned char *bitmap;
486 int ret;
487
488 if (!env->kvm_sw_tlb) {
489 return;
490 }
491
492 bitmap = g_malloc((env->nb_tlb + 7) / 8);
493 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
494
495 dirty_tlb.bitmap = (uintptr_t)bitmap;
496 dirty_tlb.num_dirty = env->nb_tlb;
497
498 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
499 if (ret) {
500 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
501 __func__, strerror(-ret));
502 }
503
504 g_free(bitmap);
505 }
506
507 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
508 {
509 PowerPCCPU *cpu = POWERPC_CPU(cs);
510 CPUPPCState *env = &cpu->env;
511 union {
512 uint32_t u32;
513 uint64_t u64;
514 } val;
515 struct kvm_one_reg reg = {
516 .id = id,
517 .addr = (uintptr_t) &val,
518 };
519 int ret;
520
521 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
522 if (ret != 0) {
523 trace_kvm_failed_spr_get(spr, strerror(errno));
524 } else {
525 switch (id & KVM_REG_SIZE_MASK) {
526 case KVM_REG_SIZE_U32:
527 env->spr[spr] = val.u32;
528 break;
529
530 case KVM_REG_SIZE_U64:
531 env->spr[spr] = val.u64;
532 break;
533
534 default:
535 /* Don't handle this size yet */
536 abort();
537 }
538 }
539 }
540
541 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
542 {
543 PowerPCCPU *cpu = POWERPC_CPU(cs);
544 CPUPPCState *env = &cpu->env;
545 union {
546 uint32_t u32;
547 uint64_t u64;
548 } val;
549 struct kvm_one_reg reg = {
550 .id = id,
551 .addr = (uintptr_t) &val,
552 };
553 int ret;
554
555 switch (id & KVM_REG_SIZE_MASK) {
556 case KVM_REG_SIZE_U32:
557 val.u32 = env->spr[spr];
558 break;
559
560 case KVM_REG_SIZE_U64:
561 val.u64 = env->spr[spr];
562 break;
563
564 default:
565 /* Don't handle this size yet */
566 abort();
567 }
568
569 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
570 if (ret != 0) {
571 trace_kvm_failed_spr_set(spr, strerror(errno));
572 }
573 }
574
575 static int kvm_put_fp(CPUState *cs)
576 {
577 PowerPCCPU *cpu = POWERPC_CPU(cs);
578 CPUPPCState *env = &cpu->env;
579 struct kvm_one_reg reg;
580 int i;
581 int ret;
582
583 if (env->insns_flags & PPC_FLOAT) {
584 uint64_t fpscr = env->fpscr;
585 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
586
587 reg.id = KVM_REG_PPC_FPSCR;
588 reg.addr = (uintptr_t)&fpscr;
589 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
590 if (ret < 0) {
591 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
592 return ret;
593 }
594
595 for (i = 0; i < 32; i++) {
596 uint64_t vsr[2];
597
598 vsr[0] = float64_val(env->fpr[i]);
599 vsr[1] = env->vsr[i];
600 reg.addr = (uintptr_t) &vsr;
601 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
602
603 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
604 if (ret < 0) {
605 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
606 i, strerror(errno));
607 return ret;
608 }
609 }
610 }
611
612 if (env->insns_flags & PPC_ALTIVEC) {
613 reg.id = KVM_REG_PPC_VSCR;
614 reg.addr = (uintptr_t)&env->vscr;
615 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
616 if (ret < 0) {
617 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
618 return ret;
619 }
620
621 for (i = 0; i < 32; i++) {
622 reg.id = KVM_REG_PPC_VR(i);
623 reg.addr = (uintptr_t)&env->avr[i];
624 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
625 if (ret < 0) {
626 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
627 return ret;
628 }
629 }
630 }
631
632 return 0;
633 }
634
635 static int kvm_get_fp(CPUState *cs)
636 {
637 PowerPCCPU *cpu = POWERPC_CPU(cs);
638 CPUPPCState *env = &cpu->env;
639 struct kvm_one_reg reg;
640 int i;
641 int ret;
642
643 if (env->insns_flags & PPC_FLOAT) {
644 uint64_t fpscr;
645 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
646
647 reg.id = KVM_REG_PPC_FPSCR;
648 reg.addr = (uintptr_t)&fpscr;
649 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650 if (ret < 0) {
651 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
652 return ret;
653 } else {
654 env->fpscr = fpscr;
655 }
656
657 for (i = 0; i < 32; i++) {
658 uint64_t vsr[2];
659
660 reg.addr = (uintptr_t) &vsr;
661 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
662
663 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
664 if (ret < 0) {
665 DPRINTF("Unable to get %s%d from KVM: %s\n",
666 vsx ? "VSR" : "FPR", i, strerror(errno));
667 return ret;
668 } else {
669 env->fpr[i] = vsr[0];
670 if (vsx) {
671 env->vsr[i] = vsr[1];
672 }
673 }
674 }
675 }
676
677 if (env->insns_flags & PPC_ALTIVEC) {
678 reg.id = KVM_REG_PPC_VSCR;
679 reg.addr = (uintptr_t)&env->vscr;
680 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
681 if (ret < 0) {
682 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
683 return ret;
684 }
685
686 for (i = 0; i < 32; i++) {
687 reg.id = KVM_REG_PPC_VR(i);
688 reg.addr = (uintptr_t)&env->avr[i];
689 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
690 if (ret < 0) {
691 DPRINTF("Unable to get VR%d from KVM: %s\n",
692 i, strerror(errno));
693 return ret;
694 }
695 }
696 }
697
698 return 0;
699 }
700
701 #if defined(TARGET_PPC64)
702 static int kvm_get_vpa(CPUState *cs)
703 {
704 PowerPCCPU *cpu = POWERPC_CPU(cs);
705 CPUPPCState *env = &cpu->env;
706 struct kvm_one_reg reg;
707 int ret;
708
709 reg.id = KVM_REG_PPC_VPA_ADDR;
710 reg.addr = (uintptr_t)&env->vpa_addr;
711 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
712 if (ret < 0) {
713 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
714 return ret;
715 }
716
717 assert((uintptr_t)&env->slb_shadow_size
718 == ((uintptr_t)&env->slb_shadow_addr + 8));
719 reg.id = KVM_REG_PPC_VPA_SLB;
720 reg.addr = (uintptr_t)&env->slb_shadow_addr;
721 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
722 if (ret < 0) {
723 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
724 strerror(errno));
725 return ret;
726 }
727
728 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
729 reg.id = KVM_REG_PPC_VPA_DTL;
730 reg.addr = (uintptr_t)&env->dtl_addr;
731 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
732 if (ret < 0) {
733 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
734 strerror(errno));
735 return ret;
736 }
737
738 return 0;
739 }
740
741 static int kvm_put_vpa(CPUState *cs)
742 {
743 PowerPCCPU *cpu = POWERPC_CPU(cs);
744 CPUPPCState *env = &cpu->env;
745 struct kvm_one_reg reg;
746 int ret;
747
748 /* SLB shadow or DTL can't be registered unless a master VPA is
749 * registered. That means when restoring state, if a VPA *is*
750 * registered, we need to set that up first. If not, we need to
751 * deregister the others before deregistering the master VPA */
752 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
753
754 if (env->vpa_addr) {
755 reg.id = KVM_REG_PPC_VPA_ADDR;
756 reg.addr = (uintptr_t)&env->vpa_addr;
757 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
758 if (ret < 0) {
759 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
760 return ret;
761 }
762 }
763
764 assert((uintptr_t)&env->slb_shadow_size
765 == ((uintptr_t)&env->slb_shadow_addr + 8));
766 reg.id = KVM_REG_PPC_VPA_SLB;
767 reg.addr = (uintptr_t)&env->slb_shadow_addr;
768 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
769 if (ret < 0) {
770 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
771 return ret;
772 }
773
774 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
775 reg.id = KVM_REG_PPC_VPA_DTL;
776 reg.addr = (uintptr_t)&env->dtl_addr;
777 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
778 if (ret < 0) {
779 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
780 strerror(errno));
781 return ret;
782 }
783
784 if (!env->vpa_addr) {
785 reg.id = KVM_REG_PPC_VPA_ADDR;
786 reg.addr = (uintptr_t)&env->vpa_addr;
787 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
788 if (ret < 0) {
789 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
790 return ret;
791 }
792 }
793
794 return 0;
795 }
796 #endif /* TARGET_PPC64 */
797
798 int kvm_arch_put_registers(CPUState *cs, int level)
799 {
800 PowerPCCPU *cpu = POWERPC_CPU(cs);
801 CPUPPCState *env = &cpu->env;
802 struct kvm_regs regs;
803 int ret;
804 int i;
805
806 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
807 if (ret < 0) {
808 return ret;
809 }
810
811 regs.ctr = env->ctr;
812 regs.lr = env->lr;
813 regs.xer = cpu_read_xer(env);
814 regs.msr = env->msr;
815 regs.pc = env->nip;
816
817 regs.srr0 = env->spr[SPR_SRR0];
818 regs.srr1 = env->spr[SPR_SRR1];
819
820 regs.sprg0 = env->spr[SPR_SPRG0];
821 regs.sprg1 = env->spr[SPR_SPRG1];
822 regs.sprg2 = env->spr[SPR_SPRG2];
823 regs.sprg3 = env->spr[SPR_SPRG3];
824 regs.sprg4 = env->spr[SPR_SPRG4];
825 regs.sprg5 = env->spr[SPR_SPRG5];
826 regs.sprg6 = env->spr[SPR_SPRG6];
827 regs.sprg7 = env->spr[SPR_SPRG7];
828
829 regs.pid = env->spr[SPR_BOOKE_PID];
830
831 for (i = 0;i < 32; i++)
832 regs.gpr[i] = env->gpr[i];
833
834 regs.cr = 0;
835 for (i = 0; i < 8; i++) {
836 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
837 }
838
839 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
840 if (ret < 0)
841 return ret;
842
843 kvm_put_fp(cs);
844
845 if (env->tlb_dirty) {
846 kvm_sw_tlb_put(cpu);
847 env->tlb_dirty = false;
848 }
849
850 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
851 struct kvm_sregs sregs;
852
853 sregs.pvr = env->spr[SPR_PVR];
854
855 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
856
857 /* Sync SLB */
858 #ifdef TARGET_PPC64
859 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
860 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
861 if (env->slb[i].esid & SLB_ESID_V) {
862 sregs.u.s.ppc64.slb[i].slbe |= i;
863 }
864 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
865 }
866 #endif
867
868 /* Sync SRs */
869 for (i = 0; i < 16; i++) {
870 sregs.u.s.ppc32.sr[i] = env->sr[i];
871 }
872
873 /* Sync BATs */
874 for (i = 0; i < 8; i++) {
875 /* Beware. We have to swap upper and lower bits here */
876 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
877 | env->DBAT[1][i];
878 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
879 | env->IBAT[1][i];
880 }
881
882 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
883 if (ret) {
884 return ret;
885 }
886 }
887
888 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
889 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
890 }
891
892 if (cap_one_reg) {
893 int i;
894
895 /* We deliberately ignore errors here, for kernels which have
896 * the ONE_REG calls, but don't support the specific
897 * registers, there's a reasonable chance things will still
898 * work, at least until we try to migrate. */
899 for (i = 0; i < 1024; i++) {
900 uint64_t id = env->spr_cb[i].one_reg_id;
901
902 if (id != 0) {
903 kvm_put_one_spr(cs, id, i);
904 }
905 }
906
907 #ifdef TARGET_PPC64
908 if (msr_ts) {
909 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
910 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
911 }
912 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
913 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
914 }
915 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
916 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
917 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
918 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
919 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
920 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
921 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
922 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
923 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
924 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
925 }
926
927 if (cap_papr) {
928 if (kvm_put_vpa(cs) < 0) {
929 DPRINTF("Warning: Unable to set VPA information to KVM\n");
930 }
931 }
932
933 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
934 #endif /* TARGET_PPC64 */
935 }
936
937 return ret;
938 }
939
940 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
941 {
942 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
943 }
944
945 int kvm_arch_get_registers(CPUState *cs)
946 {
947 PowerPCCPU *cpu = POWERPC_CPU(cs);
948 CPUPPCState *env = &cpu->env;
949 struct kvm_regs regs;
950 struct kvm_sregs sregs;
951 uint32_t cr;
952 int i, ret;
953
954 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
955 if (ret < 0)
956 return ret;
957
958 cr = regs.cr;
959 for (i = 7; i >= 0; i--) {
960 env->crf[i] = cr & 15;
961 cr >>= 4;
962 }
963
964 env->ctr = regs.ctr;
965 env->lr = regs.lr;
966 cpu_write_xer(env, regs.xer);
967 env->msr = regs.msr;
968 env->nip = regs.pc;
969
970 env->spr[SPR_SRR0] = regs.srr0;
971 env->spr[SPR_SRR1] = regs.srr1;
972
973 env->spr[SPR_SPRG0] = regs.sprg0;
974 env->spr[SPR_SPRG1] = regs.sprg1;
975 env->spr[SPR_SPRG2] = regs.sprg2;
976 env->spr[SPR_SPRG3] = regs.sprg3;
977 env->spr[SPR_SPRG4] = regs.sprg4;
978 env->spr[SPR_SPRG5] = regs.sprg5;
979 env->spr[SPR_SPRG6] = regs.sprg6;
980 env->spr[SPR_SPRG7] = regs.sprg7;
981
982 env->spr[SPR_BOOKE_PID] = regs.pid;
983
984 for (i = 0;i < 32; i++)
985 env->gpr[i] = regs.gpr[i];
986
987 kvm_get_fp(cs);
988
989 if (cap_booke_sregs) {
990 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
991 if (ret < 0) {
992 return ret;
993 }
994
995 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
996 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
997 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
998 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
999 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1000 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1001 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1002 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1003 env->spr[SPR_DECR] = sregs.u.e.dec;
1004 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1005 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1006 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1007 }
1008
1009 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1010 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1011 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1012 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1013 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1014 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1015 }
1016
1017 if (sregs.u.e.features & KVM_SREGS_E_64) {
1018 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1019 }
1020
1021 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1022 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1023 }
1024
1025 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1026 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1027 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1028 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1029 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1030 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1031 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1032 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1033 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1034 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1035 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1036 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1037 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1038 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1039 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1040 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1041 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1042 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1043 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1044 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1045 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1046 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1047 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1048 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1049 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1050 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1051 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1052 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1053 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1054 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1055 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1056 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1057 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1058
1059 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1060 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1061 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1062 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1063 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1064 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1065 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1066 }
1067
1068 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1069 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1070 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1071 }
1072
1073 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1074 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1075 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1076 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1077 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1078 }
1079 }
1080
1081 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1082 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1083 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1084 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1085 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1086 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1087 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1088 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1089 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1090 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1091 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1092 }
1093
1094 if (sregs.u.e.features & KVM_SREGS_EXP) {
1095 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1096 }
1097
1098 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1099 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1100 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1101 }
1102
1103 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1104 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1105 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1106 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1107
1108 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1109 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1110 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1111 }
1112 }
1113 }
1114
1115 if (cap_segstate) {
1116 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1117 if (ret < 0) {
1118 return ret;
1119 }
1120
1121 if (!env->external_htab) {
1122 ppc_store_sdr1(env, sregs.u.s.sdr1);
1123 }
1124
1125 /* Sync SLB */
1126 #ifdef TARGET_PPC64
1127 /*
1128 * The packed SLB array we get from KVM_GET_SREGS only contains
1129 * information about valid entries. So we flush our internal
1130 * copy to get rid of stale ones, then put all valid SLB entries
1131 * back in.
1132 */
1133 memset(env->slb, 0, sizeof(env->slb));
1134 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1135 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1136 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1137 /*
1138 * Only restore valid entries
1139 */
1140 if (rb & SLB_ESID_V) {
1141 ppc_store_slb(env, rb, rs);
1142 }
1143 }
1144 #endif
1145
1146 /* Sync SRs */
1147 for (i = 0; i < 16; i++) {
1148 env->sr[i] = sregs.u.s.ppc32.sr[i];
1149 }
1150
1151 /* Sync BATs */
1152 for (i = 0; i < 8; i++) {
1153 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1154 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1155 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1156 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1157 }
1158 }
1159
1160 if (cap_hior) {
1161 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1162 }
1163
1164 if (cap_one_reg) {
1165 int i;
1166
1167 /* We deliberately ignore errors here, for kernels which have
1168 * the ONE_REG calls, but don't support the specific
1169 * registers, there's a reasonable chance things will still
1170 * work, at least until we try to migrate. */
1171 for (i = 0; i < 1024; i++) {
1172 uint64_t id = env->spr_cb[i].one_reg_id;
1173
1174 if (id != 0) {
1175 kvm_get_one_spr(cs, id, i);
1176 }
1177 }
1178
1179 #ifdef TARGET_PPC64
1180 if (msr_ts) {
1181 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1182 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1183 }
1184 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1185 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1186 }
1187 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1188 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1189 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1190 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1191 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1192 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1193 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1194 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1195 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1196 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1197 }
1198
1199 if (cap_papr) {
1200 if (kvm_get_vpa(cs) < 0) {
1201 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1202 }
1203 }
1204
1205 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1206 #endif
1207 }
1208
1209 return 0;
1210 }
1211
1212 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1213 {
1214 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1215
1216 if (irq != PPC_INTERRUPT_EXT) {
1217 return 0;
1218 }
1219
1220 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1221 return 0;
1222 }
1223
1224 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1225
1226 return 0;
1227 }
1228
1229 #if defined(TARGET_PPCEMB)
1230 #define PPC_INPUT_INT PPC40x_INPUT_INT
1231 #elif defined(TARGET_PPC64)
1232 #define PPC_INPUT_INT PPC970_INPUT_INT
1233 #else
1234 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1235 #endif
1236
1237 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1238 {
1239 PowerPCCPU *cpu = POWERPC_CPU(cs);
1240 CPUPPCState *env = &cpu->env;
1241 int r;
1242 unsigned irq;
1243
1244 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1245 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1246 if (!cap_interrupt_level &&
1247 run->ready_for_interrupt_injection &&
1248 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1249 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1250 {
1251 /* For now KVM disregards the 'irq' argument. However, in the
1252 * future KVM could cache it in-kernel to avoid a heavyweight exit
1253 * when reading the UIC.
1254 */
1255 irq = KVM_INTERRUPT_SET;
1256
1257 DPRINTF("injected interrupt %d\n", irq);
1258 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1259 if (r < 0) {
1260 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1261 }
1262
1263 /* Always wake up soon in case the interrupt was level based */
1264 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1265 (get_ticks_per_sec() / 50));
1266 }
1267
1268 /* We don't know if there are more interrupts pending after this. However,
1269 * the guest will return to userspace in the course of handling this one
1270 * anyways, so we will get a chance to deliver the rest. */
1271 }
1272
1273 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1274 {
1275 }
1276
1277 int kvm_arch_process_async_events(CPUState *cs)
1278 {
1279 return cs->halted;
1280 }
1281
1282 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1283 {
1284 CPUState *cs = CPU(cpu);
1285 CPUPPCState *env = &cpu->env;
1286
1287 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1288 cs->halted = 1;
1289 cs->exception_index = EXCP_HLT;
1290 }
1291
1292 return 0;
1293 }
1294
1295 /* map dcr access to existing qemu dcr emulation */
1296 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1297 {
1298 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1299 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1300
1301 return 0;
1302 }
1303
1304 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1305 {
1306 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1307 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1308
1309 return 0;
1310 }
1311
1312 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1313 {
1314 /* Mixed endian case is not handled */
1315 uint32_t sc = debug_inst_opcode;
1316
1317 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1318 sizeof(sc), 0) ||
1319 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1320 return -EINVAL;
1321 }
1322
1323 return 0;
1324 }
1325
1326 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1327 {
1328 uint32_t sc;
1329
1330 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1331 sc != debug_inst_opcode ||
1332 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1333 sizeof(sc), 1)) {
1334 return -EINVAL;
1335 }
1336
1337 return 0;
1338 }
1339
1340 static int find_hw_breakpoint(target_ulong addr, int type)
1341 {
1342 int n;
1343
1344 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1345 <= ARRAY_SIZE(hw_debug_points));
1346
1347 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1348 if (hw_debug_points[n].addr == addr &&
1349 hw_debug_points[n].type == type) {
1350 return n;
1351 }
1352 }
1353
1354 return -1;
1355 }
1356
1357 static int find_hw_watchpoint(target_ulong addr, int *flag)
1358 {
1359 int n;
1360
1361 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1362 if (n >= 0) {
1363 *flag = BP_MEM_ACCESS;
1364 return n;
1365 }
1366
1367 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1368 if (n >= 0) {
1369 *flag = BP_MEM_WRITE;
1370 return n;
1371 }
1372
1373 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1374 if (n >= 0) {
1375 *flag = BP_MEM_READ;
1376 return n;
1377 }
1378
1379 return -1;
1380 }
1381
1382 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1383 target_ulong len, int type)
1384 {
1385 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1386 return -ENOBUFS;
1387 }
1388
1389 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1390 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1391
1392 switch (type) {
1393 case GDB_BREAKPOINT_HW:
1394 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1395 return -ENOBUFS;
1396 }
1397
1398 if (find_hw_breakpoint(addr, type) >= 0) {
1399 return -EEXIST;
1400 }
1401
1402 nb_hw_breakpoint++;
1403 break;
1404
1405 case GDB_WATCHPOINT_WRITE:
1406 case GDB_WATCHPOINT_READ:
1407 case GDB_WATCHPOINT_ACCESS:
1408 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1409 return -ENOBUFS;
1410 }
1411
1412 if (find_hw_breakpoint(addr, type) >= 0) {
1413 return -EEXIST;
1414 }
1415
1416 nb_hw_watchpoint++;
1417 break;
1418
1419 default:
1420 return -ENOSYS;
1421 }
1422
1423 return 0;
1424 }
1425
1426 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1427 target_ulong len, int type)
1428 {
1429 int n;
1430
1431 n = find_hw_breakpoint(addr, type);
1432 if (n < 0) {
1433 return -ENOENT;
1434 }
1435
1436 switch (type) {
1437 case GDB_BREAKPOINT_HW:
1438 nb_hw_breakpoint--;
1439 break;
1440
1441 case GDB_WATCHPOINT_WRITE:
1442 case GDB_WATCHPOINT_READ:
1443 case GDB_WATCHPOINT_ACCESS:
1444 nb_hw_watchpoint--;
1445 break;
1446
1447 default:
1448 return -ENOSYS;
1449 }
1450 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1451
1452 return 0;
1453 }
1454
1455 void kvm_arch_remove_all_hw_breakpoints(void)
1456 {
1457 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1458 }
1459
1460 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1461 {
1462 int n;
1463
1464 /* Software Breakpoint updates */
1465 if (kvm_sw_breakpoints_active(cs)) {
1466 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1467 }
1468
1469 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1470 <= ARRAY_SIZE(hw_debug_points));
1471 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1472
1473 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1474 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1475 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1476 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477 switch (hw_debug_points[n].type) {
1478 case GDB_BREAKPOINT_HW:
1479 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1480 break;
1481 case GDB_WATCHPOINT_WRITE:
1482 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1483 break;
1484 case GDB_WATCHPOINT_READ:
1485 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1486 break;
1487 case GDB_WATCHPOINT_ACCESS:
1488 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1489 KVMPPC_DEBUG_WATCH_READ;
1490 break;
1491 default:
1492 cpu_abort(cs, "Unsupported breakpoint type\n");
1493 }
1494 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1495 }
1496 }
1497 }
1498
1499 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1500 {
1501 CPUState *cs = CPU(cpu);
1502 CPUPPCState *env = &cpu->env;
1503 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1504 int handle = 0;
1505 int n;
1506 int flag = 0;
1507
1508 if (cs->singlestep_enabled) {
1509 handle = 1;
1510 } else if (arch_info->status) {
1511 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1512 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1513 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1514 if (n >= 0) {
1515 handle = 1;
1516 }
1517 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1518 KVMPPC_DEBUG_WATCH_WRITE)) {
1519 n = find_hw_watchpoint(arch_info->address, &flag);
1520 if (n >= 0) {
1521 handle = 1;
1522 cs->watchpoint_hit = &hw_watchpoint;
1523 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1524 hw_watchpoint.flags = flag;
1525 }
1526 }
1527 }
1528 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1529 handle = 1;
1530 } else {
1531 /* QEMU is not able to handle debug exception, so inject
1532 * program exception to guest;
1533 * Yes program exception NOT debug exception !!
1534 * When QEMU is using debug resources then debug exception must
1535 * be always set. To achieve this we set MSR_DE and also set
1536 * MSRP_DEP so guest cannot change MSR_DE.
1537 * When emulating debug resource for guest we want guest
1538 * to control MSR_DE (enable/disable debug interrupt on need).
1539 * Supporting both configurations are NOT possible.
1540 * So the result is that we cannot share debug resources
1541 * between QEMU and Guest on BOOKE architecture.
1542 * In the current design QEMU gets the priority over guest,
1543 * this means that if QEMU is using debug resources then guest
1544 * cannot use them;
1545 * For software breakpoint QEMU uses a privileged instruction;
1546 * So there cannot be any reason that we are here for guest
1547 * set debug exception, only possibility is guest executed a
1548 * privileged / illegal instruction and that's why we are
1549 * injecting a program interrupt.
1550 */
1551
1552 cpu_synchronize_state(cs);
1553 /* env->nip is PC, so increment this by 4 to use
1554 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1555 */
1556 env->nip += 4;
1557 cs->exception_index = POWERPC_EXCP_PROGRAM;
1558 env->error_code = POWERPC_EXCP_INVAL;
1559 ppc_cpu_do_interrupt(cs);
1560 }
1561
1562 return handle;
1563 }
1564
1565 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1566 {
1567 PowerPCCPU *cpu = POWERPC_CPU(cs);
1568 CPUPPCState *env = &cpu->env;
1569 int ret;
1570
1571 switch (run->exit_reason) {
1572 case KVM_EXIT_DCR:
1573 if (run->dcr.is_write) {
1574 DPRINTF("handle dcr write\n");
1575 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1576 } else {
1577 DPRINTF("handle dcr read\n");
1578 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1579 }
1580 break;
1581 case KVM_EXIT_HLT:
1582 DPRINTF("handle halt\n");
1583 ret = kvmppc_handle_halt(cpu);
1584 break;
1585 #if defined(TARGET_PPC64)
1586 case KVM_EXIT_PAPR_HCALL:
1587 DPRINTF("handle PAPR hypercall\n");
1588 run->papr_hcall.ret = spapr_hypercall(cpu,
1589 run->papr_hcall.nr,
1590 run->papr_hcall.args);
1591 ret = 0;
1592 break;
1593 #endif
1594 case KVM_EXIT_EPR:
1595 DPRINTF("handle epr\n");
1596 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1597 ret = 0;
1598 break;
1599 case KVM_EXIT_WATCHDOG:
1600 DPRINTF("handle watchdog expiry\n");
1601 watchdog_perform_action();
1602 ret = 0;
1603 break;
1604
1605 case KVM_EXIT_DEBUG:
1606 DPRINTF("handle debug exception\n");
1607 if (kvm_handle_debug(cpu, run)) {
1608 ret = EXCP_DEBUG;
1609 break;
1610 }
1611 /* re-enter, this exception was guest-internal */
1612 ret = 0;
1613 break;
1614
1615 default:
1616 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1617 ret = -1;
1618 break;
1619 }
1620
1621 return ret;
1622 }
1623
1624 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1625 {
1626 CPUState *cs = CPU(cpu);
1627 uint32_t bits = tsr_bits;
1628 struct kvm_one_reg reg = {
1629 .id = KVM_REG_PPC_OR_TSR,
1630 .addr = (uintptr_t) &bits,
1631 };
1632
1633 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1634 }
1635
1636 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1637 {
1638
1639 CPUState *cs = CPU(cpu);
1640 uint32_t bits = tsr_bits;
1641 struct kvm_one_reg reg = {
1642 .id = KVM_REG_PPC_CLEAR_TSR,
1643 .addr = (uintptr_t) &bits,
1644 };
1645
1646 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1647 }
1648
1649 int kvmppc_set_tcr(PowerPCCPU *cpu)
1650 {
1651 CPUState *cs = CPU(cpu);
1652 CPUPPCState *env = &cpu->env;
1653 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1654
1655 struct kvm_one_reg reg = {
1656 .id = KVM_REG_PPC_TCR,
1657 .addr = (uintptr_t) &tcr,
1658 };
1659
1660 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1661 }
1662
1663 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1664 {
1665 CPUState *cs = CPU(cpu);
1666 int ret;
1667
1668 if (!kvm_enabled()) {
1669 return -1;
1670 }
1671
1672 if (!cap_ppc_watchdog) {
1673 printf("warning: KVM does not support watchdog");
1674 return -1;
1675 }
1676
1677 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1678 if (ret < 0) {
1679 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1680 __func__, strerror(-ret));
1681 return ret;
1682 }
1683
1684 return ret;
1685 }
1686
1687 static int read_cpuinfo(const char *field, char *value, int len)
1688 {
1689 FILE *f;
1690 int ret = -1;
1691 int field_len = strlen(field);
1692 char line[512];
1693
1694 f = fopen("/proc/cpuinfo", "r");
1695 if (!f) {
1696 return -1;
1697 }
1698
1699 do {
1700 if (!fgets(line, sizeof(line), f)) {
1701 break;
1702 }
1703 if (!strncmp(line, field, field_len)) {
1704 pstrcpy(value, len, line);
1705 ret = 0;
1706 break;
1707 }
1708 } while(*line);
1709
1710 fclose(f);
1711
1712 return ret;
1713 }
1714
1715 uint32_t kvmppc_get_tbfreq(void)
1716 {
1717 char line[512];
1718 char *ns;
1719 uint32_t retval = get_ticks_per_sec();
1720
1721 if (read_cpuinfo("timebase", line, sizeof(line))) {
1722 return retval;
1723 }
1724
1725 if (!(ns = strchr(line, ':'))) {
1726 return retval;
1727 }
1728
1729 ns++;
1730
1731 retval = atoi(ns);
1732 return retval;
1733 }
1734
1735 bool kvmppc_get_host_serial(char **value)
1736 {
1737 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1738 NULL);
1739 }
1740
1741 bool kvmppc_get_host_model(char **value)
1742 {
1743 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1744 }
1745
1746 /* Try to find a device tree node for a CPU with clock-frequency property */
1747 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1748 {
1749 struct dirent *dirp;
1750 DIR *dp;
1751
1752 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1753 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1754 return -1;
1755 }
1756
1757 buf[0] = '\0';
1758 while ((dirp = readdir(dp)) != NULL) {
1759 FILE *f;
1760 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1761 dirp->d_name);
1762 f = fopen(buf, "r");
1763 if (f) {
1764 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1765 fclose(f);
1766 break;
1767 }
1768 buf[0] = '\0';
1769 }
1770 closedir(dp);
1771 if (buf[0] == '\0') {
1772 printf("Unknown host!\n");
1773 return -1;
1774 }
1775
1776 return 0;
1777 }
1778
1779 /* Read a CPU node property from the host device tree that's a single
1780 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1781 * (can't find or open the property, or doesn't understand the
1782 * format) */
1783 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1784 {
1785 char buf[PATH_MAX], *tmp;
1786 union {
1787 uint32_t v32;
1788 uint64_t v64;
1789 } u;
1790 FILE *f;
1791 int len;
1792
1793 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1794 return -1;
1795 }
1796
1797 tmp = g_strdup_printf("%s/%s", buf, propname);
1798
1799 f = fopen(tmp, "rb");
1800 g_free(tmp);
1801 if (!f) {
1802 return -1;
1803 }
1804
1805 len = fread(&u, 1, sizeof(u), f);
1806 fclose(f);
1807 switch (len) {
1808 case 4:
1809 /* property is a 32-bit quantity */
1810 return be32_to_cpu(u.v32);
1811 case 8:
1812 return be64_to_cpu(u.v64);
1813 }
1814
1815 return 0;
1816 }
1817
1818 uint64_t kvmppc_get_clockfreq(void)
1819 {
1820 return kvmppc_read_int_cpu_dt("clock-frequency");
1821 }
1822
1823 uint32_t kvmppc_get_vmx(void)
1824 {
1825 return kvmppc_read_int_cpu_dt("ibm,vmx");
1826 }
1827
1828 uint32_t kvmppc_get_dfp(void)
1829 {
1830 return kvmppc_read_int_cpu_dt("ibm,dfp");
1831 }
1832
1833 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1834 {
1835 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1836 CPUState *cs = CPU(cpu);
1837
1838 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1839 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1840 return 0;
1841 }
1842
1843 return 1;
1844 }
1845
1846 int kvmppc_get_hasidle(CPUPPCState *env)
1847 {
1848 struct kvm_ppc_pvinfo pvinfo;
1849
1850 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1851 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1852 return 1;
1853 }
1854
1855 return 0;
1856 }
1857
1858 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1859 {
1860 uint32_t *hc = (uint32_t*)buf;
1861 struct kvm_ppc_pvinfo pvinfo;
1862
1863 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1864 memcpy(buf, pvinfo.hcall, buf_len);
1865 return 0;
1866 }
1867
1868 /*
1869 * Fallback to always fail hypercalls regardless of endianness:
1870 *
1871 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1872 * li r3, -1
1873 * b .+8 (becomes nop in wrong endian)
1874 * bswap32(li r3, -1)
1875 */
1876
1877 hc[0] = cpu_to_be32(0x08000048);
1878 hc[1] = cpu_to_be32(0x3860ffff);
1879 hc[2] = cpu_to_be32(0x48000008);
1880 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1881
1882 return 0;
1883 }
1884
1885 void kvmppc_set_papr(PowerPCCPU *cpu)
1886 {
1887 CPUState *cs = CPU(cpu);
1888 int ret;
1889
1890 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1891 if (ret) {
1892 cpu_abort(cs, "This KVM version does not support PAPR\n");
1893 }
1894
1895 /* Update the capability flag so we sync the right information
1896 * with kvm */
1897 cap_papr = 1;
1898 }
1899
1900 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1901 {
1902 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1903 }
1904
1905 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1906 {
1907 CPUState *cs = CPU(cpu);
1908 int ret;
1909
1910 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1911 if (ret && mpic_proxy) {
1912 cpu_abort(cs, "This KVM version does not support EPR\n");
1913 }
1914 }
1915
1916 int kvmppc_smt_threads(void)
1917 {
1918 return cap_ppc_smt ? cap_ppc_smt : 1;
1919 }
1920
1921 #ifdef TARGET_PPC64
1922 off_t kvmppc_alloc_rma(void **rma)
1923 {
1924 off_t size;
1925 int fd;
1926 struct kvm_allocate_rma ret;
1927
1928 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1929 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1930 * not necessary on this hardware
1931 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1932 *
1933 * FIXME: We should allow the user to force contiguous RMA
1934 * allocation in the cap_ppc_rma==1 case.
1935 */
1936 if (cap_ppc_rma < 2) {
1937 return 0;
1938 }
1939
1940 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1941 if (fd < 0) {
1942 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1943 strerror(errno));
1944 return -1;
1945 }
1946
1947 size = MIN(ret.rma_size, 256ul << 20);
1948
1949 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1950 if (*rma == MAP_FAILED) {
1951 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1952 return -1;
1953 };
1954
1955 return size;
1956 }
1957
1958 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1959 {
1960 struct kvm_ppc_smmu_info info;
1961 long rampagesize, best_page_shift;
1962 int i;
1963
1964 if (cap_ppc_rma >= 2) {
1965 return current_size;
1966 }
1967
1968 /* Find the largest hardware supported page size that's less than
1969 * or equal to the (logical) backing page size of guest RAM */
1970 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1971 rampagesize = getrampagesize();
1972 best_page_shift = 0;
1973
1974 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1975 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1976
1977 if (!sps->page_shift) {
1978 continue;
1979 }
1980
1981 if ((sps->page_shift > best_page_shift)
1982 && ((1UL << sps->page_shift) <= rampagesize)) {
1983 best_page_shift = sps->page_shift;
1984 }
1985 }
1986
1987 return MIN(current_size,
1988 1ULL << (best_page_shift + hash_shift - 7));
1989 }
1990 #endif
1991
1992 bool kvmppc_spapr_use_multitce(void)
1993 {
1994 return cap_spapr_multitce;
1995 }
1996
1997 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
1998 bool vfio_accel)
1999 {
2000 struct kvm_create_spapr_tce args = {
2001 .liobn = liobn,
2002 .window_size = window_size,
2003 };
2004 long len;
2005 int fd;
2006 void *table;
2007
2008 /* Must set fd to -1 so we don't try to munmap when called for
2009 * destroying the table, which the upper layers -will- do
2010 */
2011 *pfd = -1;
2012 if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2013 return NULL;
2014 }
2015
2016 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2017 if (fd < 0) {
2018 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2019 liobn);
2020 return NULL;
2021 }
2022
2023 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2024 /* FIXME: round this up to page size */
2025
2026 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2027 if (table == MAP_FAILED) {
2028 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2029 liobn);
2030 close(fd);
2031 return NULL;
2032 }
2033
2034 *pfd = fd;
2035 return table;
2036 }
2037
2038 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2039 {
2040 long len;
2041
2042 if (fd < 0) {
2043 return -1;
2044 }
2045
2046 len = nb_table * sizeof(uint64_t);
2047 if ((munmap(table, len) < 0) ||
2048 (close(fd) < 0)) {
2049 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2050 strerror(errno));
2051 /* Leak the table */
2052 }
2053
2054 return 0;
2055 }
2056
2057 int kvmppc_reset_htab(int shift_hint)
2058 {
2059 uint32_t shift = shift_hint;
2060
2061 if (!kvm_enabled()) {
2062 /* Full emulation, tell caller to allocate htab itself */
2063 return 0;
2064 }
2065 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2066 int ret;
2067 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2068 if (ret == -ENOTTY) {
2069 /* At least some versions of PR KVM advertise the
2070 * capability, but don't implement the ioctl(). Oops.
2071 * Return 0 so that we allocate the htab in qemu, as is
2072 * correct for PR. */
2073 return 0;
2074 } else if (ret < 0) {
2075 return ret;
2076 }
2077 return shift;
2078 }
2079
2080 /* We have a kernel that predates the htab reset calls. For PR
2081 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2082 * this era, it has allocated a 16MB fixed size hash table
2083 * already. Kernels of this era have the GET_PVINFO capability
2084 * only on PR, so we use this hack to determine the right
2085 * answer */
2086 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2087 /* PR - tell caller to allocate htab */
2088 return 0;
2089 } else {
2090 /* HV - assume 16MB kernel allocated htab */
2091 return 24;
2092 }
2093 }
2094
2095 static inline uint32_t mfpvr(void)
2096 {
2097 uint32_t pvr;
2098
2099 asm ("mfpvr %0"
2100 : "=r"(pvr));
2101 return pvr;
2102 }
2103
2104 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2105 {
2106 if (on) {
2107 *word |= flags;
2108 } else {
2109 *word &= ~flags;
2110 }
2111 }
2112
2113 static void kvmppc_host_cpu_initfn(Object *obj)
2114 {
2115 assert(kvm_enabled());
2116 }
2117
2118 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2119 {
2120 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2121 uint32_t vmx = kvmppc_get_vmx();
2122 uint32_t dfp = kvmppc_get_dfp();
2123 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2124 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2125
2126 /* Now fix up the class with information we can query from the host */
2127 pcc->pvr = mfpvr();
2128
2129 if (vmx != -1) {
2130 /* Only override when we know what the host supports */
2131 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2132 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2133 }
2134 if (dfp != -1) {
2135 /* Only override when we know what the host supports */
2136 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2137 }
2138
2139 if (dcache_size != -1) {
2140 pcc->l1_dcache_size = dcache_size;
2141 }
2142
2143 if (icache_size != -1) {
2144 pcc->l1_icache_size = icache_size;
2145 }
2146 }
2147
2148 bool kvmppc_has_cap_epr(void)
2149 {
2150 return cap_epr;
2151 }
2152
2153 bool kvmppc_has_cap_htab_fd(void)
2154 {
2155 return cap_htab_fd;
2156 }
2157
2158 bool kvmppc_has_cap_fixup_hcalls(void)
2159 {
2160 return cap_fixup_hcalls;
2161 }
2162
2163 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2164 {
2165 ObjectClass *oc = OBJECT_CLASS(pcc);
2166
2167 while (oc && !object_class_is_abstract(oc)) {
2168 oc = object_class_get_parent(oc);
2169 }
2170 assert(oc);
2171
2172 return POWERPC_CPU_CLASS(oc);
2173 }
2174
2175 static int kvm_ppc_register_host_cpu_type(void)
2176 {
2177 TypeInfo type_info = {
2178 .name = TYPE_HOST_POWERPC_CPU,
2179 .instance_init = kvmppc_host_cpu_initfn,
2180 .class_init = kvmppc_host_cpu_class_init,
2181 };
2182 uint32_t host_pvr = mfpvr();
2183 PowerPCCPUClass *pvr_pcc;
2184 DeviceClass *dc;
2185
2186 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2187 if (pvr_pcc == NULL) {
2188 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2189 }
2190 if (pvr_pcc == NULL) {
2191 return -1;
2192 }
2193 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2194 type_register(&type_info);
2195
2196 /* Register generic family CPU class for a family */
2197 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2198 dc = DEVICE_CLASS(pvr_pcc);
2199 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2200 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2201 type_register(&type_info);
2202
2203 return 0;
2204 }
2205
2206 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2207 {
2208 struct kvm_rtas_token_args args = {
2209 .token = token,
2210 };
2211
2212 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2213 return -ENOENT;
2214 }
2215
2216 strncpy(args.name, function, sizeof(args.name));
2217
2218 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2219 }
2220
2221 int kvmppc_get_htab_fd(bool write)
2222 {
2223 struct kvm_get_htab_fd s = {
2224 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2225 .start_index = 0,
2226 };
2227
2228 if (!cap_htab_fd) {
2229 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2230 return -1;
2231 }
2232
2233 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2234 }
2235
2236 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2237 {
2238 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2239 uint8_t buf[bufsize];
2240 ssize_t rc;
2241
2242 do {
2243 rc = read(fd, buf, bufsize);
2244 if (rc < 0) {
2245 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2246 strerror(errno));
2247 return rc;
2248 } else if (rc) {
2249 /* Kernel already retuns data in BE format for the file */
2250 qemu_put_buffer(f, buf, rc);
2251 }
2252 } while ((rc != 0)
2253 && ((max_ns < 0)
2254 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2255
2256 return (rc == 0) ? 1 : 0;
2257 }
2258
2259 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2260 uint16_t n_valid, uint16_t n_invalid)
2261 {
2262 struct kvm_get_htab_header *buf;
2263 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2264 ssize_t rc;
2265
2266 buf = alloca(chunksize);
2267 /* This is KVM on ppc, so this is all big-endian */
2268 buf->index = index;
2269 buf->n_valid = n_valid;
2270 buf->n_invalid = n_invalid;
2271
2272 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2273
2274 rc = write(fd, buf, chunksize);
2275 if (rc < 0) {
2276 fprintf(stderr, "Error writing KVM hash table: %s\n",
2277 strerror(errno));
2278 return rc;
2279 }
2280 if (rc != chunksize) {
2281 /* We should never get a short write on a single chunk */
2282 fprintf(stderr, "Short write, restoring KVM hash table\n");
2283 return -1;
2284 }
2285 return 0;
2286 }
2287
2288 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2289 {
2290 return true;
2291 }
2292
2293 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2294 {
2295 return 1;
2296 }
2297
2298 int kvm_arch_on_sigbus(int code, void *addr)
2299 {
2300 return 1;
2301 }
2302
2303 void kvm_arch_init_irq_routing(KVMState *s)
2304 {
2305 }
2306
2307 struct kvm_get_htab_buf {
2308 struct kvm_get_htab_header header;
2309 /*
2310 * We require one extra byte for read
2311 */
2312 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2313 };
2314
2315 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2316 {
2317 int htab_fd;
2318 struct kvm_get_htab_fd ghf;
2319 struct kvm_get_htab_buf *hpte_buf;
2320
2321 ghf.flags = 0;
2322 ghf.start_index = pte_index;
2323 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2324 if (htab_fd < 0) {
2325 goto error_out;
2326 }
2327
2328 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2329 /*
2330 * Read the hpte group
2331 */
2332 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2333 goto out_close;
2334 }
2335
2336 close(htab_fd);
2337 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2338
2339 out_close:
2340 g_free(hpte_buf);
2341 close(htab_fd);
2342 error_out:
2343 return 0;
2344 }
2345
2346 void kvmppc_hash64_free_pteg(uint64_t token)
2347 {
2348 struct kvm_get_htab_buf *htab_buf;
2349
2350 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2351 hpte);
2352 g_free(htab_buf);
2353 return;
2354 }
2355
2356 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2357 target_ulong pte0, target_ulong pte1)
2358 {
2359 int htab_fd;
2360 struct kvm_get_htab_fd ghf;
2361 struct kvm_get_htab_buf hpte_buf;
2362
2363 ghf.flags = 0;
2364 ghf.start_index = 0; /* Ignored */
2365 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2366 if (htab_fd < 0) {
2367 goto error_out;
2368 }
2369
2370 hpte_buf.header.n_valid = 1;
2371 hpte_buf.header.n_invalid = 0;
2372 hpte_buf.header.index = pte_index;
2373 hpte_buf.hpte[0] = pte0;
2374 hpte_buf.hpte[1] = pte1;
2375 /*
2376 * Write the hpte entry.
2377 * CAUTION: write() has the warn_unused_result attribute. Hence we
2378 * need to check the return value, even though we do nothing.
2379 */
2380 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2381 goto out_close;
2382 }
2383
2384 out_close:
2385 close(htab_fd);
2386 return;
2387
2388 error_out:
2389 return;
2390 }