]> git.proxmox.com Git - mirror_qemu.git/blob - target-ppc/kvm.c
ppc/kvm: Mark 64kB page size support as disabled if not available
[mirror_qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
49 #endif
50
51 //#define DEBUG_KVM
52
53 #ifdef DEBUG_KVM
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #else
57 #define DPRINTF(fmt, ...) \
58 do { } while (0)
59 #endif
60
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
62
63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64 KVM_CAP_LAST_INFO
65 };
66
67 static int cap_interrupt_unset = false;
68 static int cap_interrupt_level = false;
69 static int cap_segstate;
70 static int cap_booke_sregs;
71 static int cap_ppc_smt;
72 static int cap_ppc_rma;
73 static int cap_spapr_tce;
74 static int cap_spapr_multitce;
75 static int cap_spapr_vfio;
76 static int cap_hior;
77 static int cap_one_reg;
78 static int cap_epr;
79 static int cap_ppc_watchdog;
80 static int cap_papr;
81 static int cap_htab_fd;
82 static int cap_fixup_hcalls;
83
84 static uint32_t debug_inst_opcode;
85
86 /* XXX We have a race condition where we actually have a level triggered
87 * interrupt, but the infrastructure can't expose that yet, so the guest
88 * takes but ignores it, goes to sleep and never gets notified that there's
89 * still an interrupt pending.
90 *
91 * As a quick workaround, let's just wake up again 20 ms after we injected
92 * an interrupt. That way we can assure that we're always reinjecting
93 * interrupts in case the guest swallowed them.
94 */
95 static QEMUTimer *idle_timer;
96
97 static void kvm_kick_cpu(void *opaque)
98 {
99 PowerPCCPU *cpu = opaque;
100
101 qemu_cpu_kick(CPU(cpu));
102 }
103
104 static int kvm_ppc_register_host_cpu_type(void);
105
106 int kvm_arch_init(MachineState *ms, KVMState *s)
107 {
108 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
109 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
110 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
111 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
112 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
113 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
114 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
115 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
116 cap_spapr_vfio = false;
117 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
118 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
119 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
120 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
121 /* Note: we don't set cap_papr here, because this capability is
122 * only activated after this by kvmppc_set_papr() */
123 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
124 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
125
126 if (!cap_interrupt_level) {
127 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
128 "VM to stall at times!\n");
129 }
130
131 kvm_ppc_register_host_cpu_type();
132
133 return 0;
134 }
135
136 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
137 {
138 CPUPPCState *cenv = &cpu->env;
139 CPUState *cs = CPU(cpu);
140 struct kvm_sregs sregs;
141 int ret;
142
143 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
144 /* What we're really trying to say is "if we're on BookE, we use
145 the native PVR for now". This is the only sane way to check
146 it though, so we potentially confuse users that they can run
147 BookE guests on BookS. Let's hope nobody dares enough :) */
148 return 0;
149 } else {
150 if (!cap_segstate) {
151 fprintf(stderr, "kvm error: missing PVR setting capability\n");
152 return -ENOSYS;
153 }
154 }
155
156 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
157 if (ret) {
158 return ret;
159 }
160
161 sregs.pvr = cenv->spr[SPR_PVR];
162 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
163 }
164
165 /* Set up a shared TLB array with KVM */
166 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
167 {
168 CPUPPCState *env = &cpu->env;
169 CPUState *cs = CPU(cpu);
170 struct kvm_book3e_206_tlb_params params = {};
171 struct kvm_config_tlb cfg = {};
172 unsigned int entries = 0;
173 int ret, i;
174
175 if (!kvm_enabled() ||
176 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
177 return 0;
178 }
179
180 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
181
182 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
183 params.tlb_sizes[i] = booke206_tlb_size(env, i);
184 params.tlb_ways[i] = booke206_tlb_ways(env, i);
185 entries += params.tlb_sizes[i];
186 }
187
188 assert(entries == env->nb_tlb);
189 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
190
191 env->tlb_dirty = true;
192
193 cfg.array = (uintptr_t)env->tlb.tlbm;
194 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
195 cfg.params = (uintptr_t)&params;
196 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
197
198 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
199 if (ret < 0) {
200 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
201 __func__, strerror(-ret));
202 return ret;
203 }
204
205 env->kvm_sw_tlb = true;
206 return 0;
207 }
208
209
210 #if defined(TARGET_PPC64)
211 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
212 struct kvm_ppc_smmu_info *info)
213 {
214 CPUPPCState *env = &cpu->env;
215 CPUState *cs = CPU(cpu);
216
217 memset(info, 0, sizeof(*info));
218
219 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
220 * need to "guess" what the supported page sizes are.
221 *
222 * For that to work we make a few assumptions:
223 *
224 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
225 * KVM which only supports 4K and 16M pages, but supports them
226 * regardless of the backing store characteritics. We also don't
227 * support 1T segments.
228 *
229 * This is safe as if HV KVM ever supports that capability or PR
230 * KVM grows supports for more page/segment sizes, those versions
231 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
232 * will not hit this fallback
233 *
234 * - Else we are running HV KVM. This means we only support page
235 * sizes that fit in the backing store. Additionally we only
236 * advertize 64K pages if the processor is ARCH 2.06 and we assume
237 * P7 encodings for the SLB and hash table. Here too, we assume
238 * support for any newer processor will mean a kernel that
239 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
240 * this fallback.
241 */
242 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
243 /* No flags */
244 info->flags = 0;
245 info->slb_size = 64;
246
247 /* Standard 4k base page size segment */
248 info->sps[0].page_shift = 12;
249 info->sps[0].slb_enc = 0;
250 info->sps[0].enc[0].page_shift = 12;
251 info->sps[0].enc[0].pte_enc = 0;
252
253 /* Standard 16M large page size segment */
254 info->sps[1].page_shift = 24;
255 info->sps[1].slb_enc = SLB_VSID_L;
256 info->sps[1].enc[0].page_shift = 24;
257 info->sps[1].enc[0].pte_enc = 0;
258 } else {
259 int i = 0;
260
261 /* HV KVM has backing store size restrictions */
262 info->flags = KVM_PPC_PAGE_SIZES_REAL;
263
264 if (env->mmu_model & POWERPC_MMU_1TSEG) {
265 info->flags |= KVM_PPC_1T_SEGMENTS;
266 }
267
268 if (env->mmu_model == POWERPC_MMU_2_06 ||
269 env->mmu_model == POWERPC_MMU_2_07) {
270 info->slb_size = 32;
271 } else {
272 info->slb_size = 64;
273 }
274
275 /* Standard 4k base page size segment */
276 info->sps[i].page_shift = 12;
277 info->sps[i].slb_enc = 0;
278 info->sps[i].enc[0].page_shift = 12;
279 info->sps[i].enc[0].pte_enc = 0;
280 i++;
281
282 /* 64K on MMU 2.06 and later */
283 if (env->mmu_model == POWERPC_MMU_2_06 ||
284 env->mmu_model == POWERPC_MMU_2_07) {
285 info->sps[i].page_shift = 16;
286 info->sps[i].slb_enc = 0x110;
287 info->sps[i].enc[0].page_shift = 16;
288 info->sps[i].enc[0].pte_enc = 1;
289 i++;
290 }
291
292 /* Standard 16M large page size segment */
293 info->sps[i].page_shift = 24;
294 info->sps[i].slb_enc = SLB_VSID_L;
295 info->sps[i].enc[0].page_shift = 24;
296 info->sps[i].enc[0].pte_enc = 0;
297 }
298 }
299
300 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
301 {
302 CPUState *cs = CPU(cpu);
303 int ret;
304
305 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
306 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
307 if (ret == 0) {
308 return;
309 }
310 }
311
312 kvm_get_fallback_smmu_info(cpu, info);
313 }
314
315 static long gethugepagesize(const char *mem_path)
316 {
317 struct statfs fs;
318 int ret;
319
320 do {
321 ret = statfs(mem_path, &fs);
322 } while (ret != 0 && errno == EINTR);
323
324 if (ret != 0) {
325 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
326 strerror(errno));
327 exit(1);
328 }
329
330 #define HUGETLBFS_MAGIC 0x958458f6
331
332 if (fs.f_type != HUGETLBFS_MAGIC) {
333 /* Explicit mempath, but it's ordinary pages */
334 return getpagesize();
335 }
336
337 /* It's hugepage, return the huge page size */
338 return fs.f_bsize;
339 }
340
341 /*
342 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
343 * may or may not name the same files / on the same filesystem now as
344 * when we actually open and map them. Iterate over the file
345 * descriptors instead, and use qemu_fd_getpagesize().
346 */
347 static int find_max_supported_pagesize(Object *obj, void *opaque)
348 {
349 char *mem_path;
350 long *hpsize_min = opaque;
351
352 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
353 mem_path = object_property_get_str(obj, "mem-path", NULL);
354 if (mem_path) {
355 long hpsize = gethugepagesize(mem_path);
356 if (hpsize < *hpsize_min) {
357 *hpsize_min = hpsize;
358 }
359 } else {
360 *hpsize_min = getpagesize();
361 }
362 }
363
364 return 0;
365 }
366
367 static long getrampagesize(void)
368 {
369 long hpsize = LONG_MAX;
370 long mainrampagesize;
371 Object *memdev_root;
372
373 if (mem_path) {
374 mainrampagesize = gethugepagesize(mem_path);
375 } else {
376 mainrampagesize = getpagesize();
377 }
378
379 /* it's possible we have memory-backend objects with
380 * hugepage-backed RAM. these may get mapped into system
381 * address space via -numa parameters or memory hotplug
382 * hooks. we want to take these into account, but we
383 * also want to make sure these supported hugepage
384 * sizes are applicable across the entire range of memory
385 * we may boot from, so we take the min across all
386 * backends, and assume normal pages in cases where a
387 * backend isn't backed by hugepages.
388 */
389 memdev_root = object_resolve_path("/objects", NULL);
390 if (memdev_root) {
391 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
392 }
393 if (hpsize == LONG_MAX) {
394 /* No additional memory regions found ==> Report main RAM page size */
395 return mainrampagesize;
396 }
397
398 /* If NUMA is disabled or the NUMA nodes are not backed with a
399 * memory-backend, then there is at least one node using "normal" RAM,
400 * so if its page size is smaller we have got to report that size instead.
401 */
402 if (hpsize > mainrampagesize &&
403 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
404 static bool warned;
405 if (!warned) {
406 error_report("Huge page support disabled (n/a for main memory).");
407 warned = true;
408 }
409 return mainrampagesize;
410 }
411
412 return hpsize;
413 }
414
415 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
416 {
417 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
418 return true;
419 }
420
421 return (1ul << shift) <= rampgsize;
422 }
423
424 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
425 {
426 static struct kvm_ppc_smmu_info smmu_info;
427 static bool has_smmu_info;
428 CPUPPCState *env = &cpu->env;
429 long rampagesize;
430 int iq, ik, jq, jk;
431 bool has_64k_pages = false;
432
433 /* We only handle page sizes for 64-bit server guests for now */
434 if (!(env->mmu_model & POWERPC_MMU_64)) {
435 return;
436 }
437
438 /* Collect MMU info from kernel if not already */
439 if (!has_smmu_info) {
440 kvm_get_smmu_info(cpu, &smmu_info);
441 has_smmu_info = true;
442 }
443
444 rampagesize = getrampagesize();
445
446 /* Convert to QEMU form */
447 memset(&env->sps, 0, sizeof(env->sps));
448
449 /* If we have HV KVM, we need to forbid CI large pages if our
450 * host page size is smaller than 64K.
451 */
452 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
453 env->ci_large_pages = getpagesize() >= 0x10000;
454 }
455
456 /*
457 * XXX This loop should be an entry wide AND of the capabilities that
458 * the selected CPU has with the capabilities that KVM supports.
459 */
460 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
461 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
462 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
463
464 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
465 ksps->page_shift)) {
466 continue;
467 }
468 qsps->page_shift = ksps->page_shift;
469 qsps->slb_enc = ksps->slb_enc;
470 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
471 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
472 ksps->enc[jk].page_shift)) {
473 continue;
474 }
475 if (ksps->enc[jk].page_shift == 16) {
476 has_64k_pages = true;
477 }
478 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
479 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
480 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
481 break;
482 }
483 }
484 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
485 break;
486 }
487 }
488 env->slb_nr = smmu_info.slb_size;
489 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
490 env->mmu_model &= ~POWERPC_MMU_1TSEG;
491 }
492 if (!has_64k_pages) {
493 env->mmu_model &= ~POWERPC_MMU_64K;
494 }
495 }
496 #else /* defined (TARGET_PPC64) */
497
498 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
499 {
500 }
501
502 #endif /* !defined (TARGET_PPC64) */
503
504 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
505 {
506 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
507 }
508
509 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
510 * book3s supports only 1 watchpoint, so array size
511 * of 4 is sufficient for now.
512 */
513 #define MAX_HW_BKPTS 4
514
515 static struct HWBreakpoint {
516 target_ulong addr;
517 int type;
518 } hw_debug_points[MAX_HW_BKPTS];
519
520 static CPUWatchpoint hw_watchpoint;
521
522 /* Default there is no breakpoint and watchpoint supported */
523 static int max_hw_breakpoint;
524 static int max_hw_watchpoint;
525 static int nb_hw_breakpoint;
526 static int nb_hw_watchpoint;
527
528 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
529 {
530 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
531 max_hw_breakpoint = 2;
532 max_hw_watchpoint = 2;
533 }
534
535 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
536 fprintf(stderr, "Error initializing h/w breakpoints\n");
537 return;
538 }
539 }
540
541 int kvm_arch_init_vcpu(CPUState *cs)
542 {
543 PowerPCCPU *cpu = POWERPC_CPU(cs);
544 CPUPPCState *cenv = &cpu->env;
545 int ret;
546
547 /* Gather server mmu info from KVM and update the CPU state */
548 kvm_fixup_page_sizes(cpu);
549
550 /* Synchronize sregs with kvm */
551 ret = kvm_arch_sync_sregs(cpu);
552 if (ret) {
553 if (ret == -EINVAL) {
554 error_report("Register sync failed... If you're using kvm-hv.ko,"
555 " only \"-cpu host\" is possible");
556 }
557 return ret;
558 }
559
560 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
561
562 /* Some targets support access to KVM's guest TLB. */
563 switch (cenv->mmu_model) {
564 case POWERPC_MMU_BOOKE206:
565 ret = kvm_booke206_tlb_init(cpu);
566 break;
567 default:
568 break;
569 }
570
571 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
572 kvmppc_hw_debug_points_init(cenv);
573
574 return ret;
575 }
576
577 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
578 {
579 CPUPPCState *env = &cpu->env;
580 CPUState *cs = CPU(cpu);
581 struct kvm_dirty_tlb dirty_tlb;
582 unsigned char *bitmap;
583 int ret;
584
585 if (!env->kvm_sw_tlb) {
586 return;
587 }
588
589 bitmap = g_malloc((env->nb_tlb + 7) / 8);
590 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
591
592 dirty_tlb.bitmap = (uintptr_t)bitmap;
593 dirty_tlb.num_dirty = env->nb_tlb;
594
595 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
596 if (ret) {
597 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
598 __func__, strerror(-ret));
599 }
600
601 g_free(bitmap);
602 }
603
604 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
605 {
606 PowerPCCPU *cpu = POWERPC_CPU(cs);
607 CPUPPCState *env = &cpu->env;
608 union {
609 uint32_t u32;
610 uint64_t u64;
611 } val;
612 struct kvm_one_reg reg = {
613 .id = id,
614 .addr = (uintptr_t) &val,
615 };
616 int ret;
617
618 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
619 if (ret != 0) {
620 trace_kvm_failed_spr_get(spr, strerror(errno));
621 } else {
622 switch (id & KVM_REG_SIZE_MASK) {
623 case KVM_REG_SIZE_U32:
624 env->spr[spr] = val.u32;
625 break;
626
627 case KVM_REG_SIZE_U64:
628 env->spr[spr] = val.u64;
629 break;
630
631 default:
632 /* Don't handle this size yet */
633 abort();
634 }
635 }
636 }
637
638 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
639 {
640 PowerPCCPU *cpu = POWERPC_CPU(cs);
641 CPUPPCState *env = &cpu->env;
642 union {
643 uint32_t u32;
644 uint64_t u64;
645 } val;
646 struct kvm_one_reg reg = {
647 .id = id,
648 .addr = (uintptr_t) &val,
649 };
650 int ret;
651
652 switch (id & KVM_REG_SIZE_MASK) {
653 case KVM_REG_SIZE_U32:
654 val.u32 = env->spr[spr];
655 break;
656
657 case KVM_REG_SIZE_U64:
658 val.u64 = env->spr[spr];
659 break;
660
661 default:
662 /* Don't handle this size yet */
663 abort();
664 }
665
666 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
667 if (ret != 0) {
668 trace_kvm_failed_spr_set(spr, strerror(errno));
669 }
670 }
671
672 static int kvm_put_fp(CPUState *cs)
673 {
674 PowerPCCPU *cpu = POWERPC_CPU(cs);
675 CPUPPCState *env = &cpu->env;
676 struct kvm_one_reg reg;
677 int i;
678 int ret;
679
680 if (env->insns_flags & PPC_FLOAT) {
681 uint64_t fpscr = env->fpscr;
682 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
683
684 reg.id = KVM_REG_PPC_FPSCR;
685 reg.addr = (uintptr_t)&fpscr;
686 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
687 if (ret < 0) {
688 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
689 return ret;
690 }
691
692 for (i = 0; i < 32; i++) {
693 uint64_t vsr[2];
694
695 #ifdef HOST_WORDS_BIGENDIAN
696 vsr[0] = float64_val(env->fpr[i]);
697 vsr[1] = env->vsr[i];
698 #else
699 vsr[0] = env->vsr[i];
700 vsr[1] = float64_val(env->fpr[i]);
701 #endif
702 reg.addr = (uintptr_t) &vsr;
703 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
704
705 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
706 if (ret < 0) {
707 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
708 i, strerror(errno));
709 return ret;
710 }
711 }
712 }
713
714 if (env->insns_flags & PPC_ALTIVEC) {
715 reg.id = KVM_REG_PPC_VSCR;
716 reg.addr = (uintptr_t)&env->vscr;
717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718 if (ret < 0) {
719 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
720 return ret;
721 }
722
723 for (i = 0; i < 32; i++) {
724 reg.id = KVM_REG_PPC_VR(i);
725 reg.addr = (uintptr_t)&env->avr[i];
726 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
727 if (ret < 0) {
728 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
729 return ret;
730 }
731 }
732 }
733
734 return 0;
735 }
736
737 static int kvm_get_fp(CPUState *cs)
738 {
739 PowerPCCPU *cpu = POWERPC_CPU(cs);
740 CPUPPCState *env = &cpu->env;
741 struct kvm_one_reg reg;
742 int i;
743 int ret;
744
745 if (env->insns_flags & PPC_FLOAT) {
746 uint64_t fpscr;
747 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
748
749 reg.id = KVM_REG_PPC_FPSCR;
750 reg.addr = (uintptr_t)&fpscr;
751 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
752 if (ret < 0) {
753 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
754 return ret;
755 } else {
756 env->fpscr = fpscr;
757 }
758
759 for (i = 0; i < 32; i++) {
760 uint64_t vsr[2];
761
762 reg.addr = (uintptr_t) &vsr;
763 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
764
765 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
766 if (ret < 0) {
767 DPRINTF("Unable to get %s%d from KVM: %s\n",
768 vsx ? "VSR" : "FPR", i, strerror(errno));
769 return ret;
770 } else {
771 #ifdef HOST_WORDS_BIGENDIAN
772 env->fpr[i] = vsr[0];
773 if (vsx) {
774 env->vsr[i] = vsr[1];
775 }
776 #else
777 env->fpr[i] = vsr[1];
778 if (vsx) {
779 env->vsr[i] = vsr[0];
780 }
781 #endif
782 }
783 }
784 }
785
786 if (env->insns_flags & PPC_ALTIVEC) {
787 reg.id = KVM_REG_PPC_VSCR;
788 reg.addr = (uintptr_t)&env->vscr;
789 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
790 if (ret < 0) {
791 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
792 return ret;
793 }
794
795 for (i = 0; i < 32; i++) {
796 reg.id = KVM_REG_PPC_VR(i);
797 reg.addr = (uintptr_t)&env->avr[i];
798 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
799 if (ret < 0) {
800 DPRINTF("Unable to get VR%d from KVM: %s\n",
801 i, strerror(errno));
802 return ret;
803 }
804 }
805 }
806
807 return 0;
808 }
809
810 #if defined(TARGET_PPC64)
811 static int kvm_get_vpa(CPUState *cs)
812 {
813 PowerPCCPU *cpu = POWERPC_CPU(cs);
814 CPUPPCState *env = &cpu->env;
815 struct kvm_one_reg reg;
816 int ret;
817
818 reg.id = KVM_REG_PPC_VPA_ADDR;
819 reg.addr = (uintptr_t)&env->vpa_addr;
820 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
821 if (ret < 0) {
822 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
823 return ret;
824 }
825
826 assert((uintptr_t)&env->slb_shadow_size
827 == ((uintptr_t)&env->slb_shadow_addr + 8));
828 reg.id = KVM_REG_PPC_VPA_SLB;
829 reg.addr = (uintptr_t)&env->slb_shadow_addr;
830 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
831 if (ret < 0) {
832 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
833 strerror(errno));
834 return ret;
835 }
836
837 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
838 reg.id = KVM_REG_PPC_VPA_DTL;
839 reg.addr = (uintptr_t)&env->dtl_addr;
840 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
841 if (ret < 0) {
842 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
843 strerror(errno));
844 return ret;
845 }
846
847 return 0;
848 }
849
850 static int kvm_put_vpa(CPUState *cs)
851 {
852 PowerPCCPU *cpu = POWERPC_CPU(cs);
853 CPUPPCState *env = &cpu->env;
854 struct kvm_one_reg reg;
855 int ret;
856
857 /* SLB shadow or DTL can't be registered unless a master VPA is
858 * registered. That means when restoring state, if a VPA *is*
859 * registered, we need to set that up first. If not, we need to
860 * deregister the others before deregistering the master VPA */
861 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
862
863 if (env->vpa_addr) {
864 reg.id = KVM_REG_PPC_VPA_ADDR;
865 reg.addr = (uintptr_t)&env->vpa_addr;
866 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
867 if (ret < 0) {
868 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
869 return ret;
870 }
871 }
872
873 assert((uintptr_t)&env->slb_shadow_size
874 == ((uintptr_t)&env->slb_shadow_addr + 8));
875 reg.id = KVM_REG_PPC_VPA_SLB;
876 reg.addr = (uintptr_t)&env->slb_shadow_addr;
877 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
878 if (ret < 0) {
879 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
880 return ret;
881 }
882
883 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
884 reg.id = KVM_REG_PPC_VPA_DTL;
885 reg.addr = (uintptr_t)&env->dtl_addr;
886 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
887 if (ret < 0) {
888 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
889 strerror(errno));
890 return ret;
891 }
892
893 if (!env->vpa_addr) {
894 reg.id = KVM_REG_PPC_VPA_ADDR;
895 reg.addr = (uintptr_t)&env->vpa_addr;
896 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
897 if (ret < 0) {
898 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
899 return ret;
900 }
901 }
902
903 return 0;
904 }
905 #endif /* TARGET_PPC64 */
906
907 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
908 {
909 CPUPPCState *env = &cpu->env;
910 struct kvm_sregs sregs;
911 int i;
912
913 sregs.pvr = env->spr[SPR_PVR];
914
915 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
916
917 /* Sync SLB */
918 #ifdef TARGET_PPC64
919 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
920 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
921 if (env->slb[i].esid & SLB_ESID_V) {
922 sregs.u.s.ppc64.slb[i].slbe |= i;
923 }
924 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
925 }
926 #endif
927
928 /* Sync SRs */
929 for (i = 0; i < 16; i++) {
930 sregs.u.s.ppc32.sr[i] = env->sr[i];
931 }
932
933 /* Sync BATs */
934 for (i = 0; i < 8; i++) {
935 /* Beware. We have to swap upper and lower bits here */
936 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
937 | env->DBAT[1][i];
938 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
939 | env->IBAT[1][i];
940 }
941
942 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
943 }
944
945 int kvm_arch_put_registers(CPUState *cs, int level)
946 {
947 PowerPCCPU *cpu = POWERPC_CPU(cs);
948 CPUPPCState *env = &cpu->env;
949 struct kvm_regs regs;
950 int ret;
951 int i;
952
953 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
954 if (ret < 0) {
955 return ret;
956 }
957
958 regs.ctr = env->ctr;
959 regs.lr = env->lr;
960 regs.xer = cpu_read_xer(env);
961 regs.msr = env->msr;
962 regs.pc = env->nip;
963
964 regs.srr0 = env->spr[SPR_SRR0];
965 regs.srr1 = env->spr[SPR_SRR1];
966
967 regs.sprg0 = env->spr[SPR_SPRG0];
968 regs.sprg1 = env->spr[SPR_SPRG1];
969 regs.sprg2 = env->spr[SPR_SPRG2];
970 regs.sprg3 = env->spr[SPR_SPRG3];
971 regs.sprg4 = env->spr[SPR_SPRG4];
972 regs.sprg5 = env->spr[SPR_SPRG5];
973 regs.sprg6 = env->spr[SPR_SPRG6];
974 regs.sprg7 = env->spr[SPR_SPRG7];
975
976 regs.pid = env->spr[SPR_BOOKE_PID];
977
978 for (i = 0;i < 32; i++)
979 regs.gpr[i] = env->gpr[i];
980
981 regs.cr = 0;
982 for (i = 0; i < 8; i++) {
983 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
984 }
985
986 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
987 if (ret < 0)
988 return ret;
989
990 kvm_put_fp(cs);
991
992 if (env->tlb_dirty) {
993 kvm_sw_tlb_put(cpu);
994 env->tlb_dirty = false;
995 }
996
997 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
998 ret = kvmppc_put_books_sregs(cpu);
999 if (ret < 0) {
1000 return ret;
1001 }
1002 }
1003
1004 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1005 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1006 }
1007
1008 if (cap_one_reg) {
1009 int i;
1010
1011 /* We deliberately ignore errors here, for kernels which have
1012 * the ONE_REG calls, but don't support the specific
1013 * registers, there's a reasonable chance things will still
1014 * work, at least until we try to migrate. */
1015 for (i = 0; i < 1024; i++) {
1016 uint64_t id = env->spr_cb[i].one_reg_id;
1017
1018 if (id != 0) {
1019 kvm_put_one_spr(cs, id, i);
1020 }
1021 }
1022
1023 #ifdef TARGET_PPC64
1024 if (msr_ts) {
1025 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1026 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1027 }
1028 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1029 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1030 }
1031 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1032 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1033 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1034 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1035 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1036 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1037 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1038 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1039 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1040 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1041 }
1042
1043 if (cap_papr) {
1044 if (kvm_put_vpa(cs) < 0) {
1045 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1046 }
1047 }
1048
1049 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1050 #endif /* TARGET_PPC64 */
1051 }
1052
1053 return ret;
1054 }
1055
1056 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1057 {
1058 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1059 }
1060
1061 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1062 {
1063 CPUPPCState *env = &cpu->env;
1064 struct kvm_sregs sregs;
1065 int ret;
1066
1067 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1068 if (ret < 0) {
1069 return ret;
1070 }
1071
1072 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1073 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1074 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1075 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1076 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1077 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1078 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1079 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1080 env->spr[SPR_DECR] = sregs.u.e.dec;
1081 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1082 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1083 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1084 }
1085
1086 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1087 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1088 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1089 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1090 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1091 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1092 }
1093
1094 if (sregs.u.e.features & KVM_SREGS_E_64) {
1095 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1096 }
1097
1098 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1099 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1100 }
1101
1102 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1103 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1104 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1105 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1106 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1107 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1108 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1109 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1110 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1111 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1112 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1113 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1114 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1115 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1116 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1117 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1118 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1119 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1120 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1121 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1122 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1123 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1124 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1125 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1126 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1127 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1128 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1129 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1130 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1131 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1132 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1133 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1134 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1135
1136 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1137 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1138 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1139 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1140 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1141 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1142 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1143 }
1144
1145 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1146 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1147 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1148 }
1149
1150 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1151 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1152 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1153 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1154 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1155 }
1156 }
1157
1158 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1159 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1160 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1161 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1162 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1163 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1164 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1165 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1166 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1167 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1168 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1169 }
1170
1171 if (sregs.u.e.features & KVM_SREGS_EXP) {
1172 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1173 }
1174
1175 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1176 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1177 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1178 }
1179
1180 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1181 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1182 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1183 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1184
1185 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1186 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1187 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1188 }
1189 }
1190
1191 return 0;
1192 }
1193
1194 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1195 {
1196 CPUPPCState *env = &cpu->env;
1197 struct kvm_sregs sregs;
1198 int ret;
1199 int i;
1200
1201 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1202 if (ret < 0) {
1203 return ret;
1204 }
1205
1206 if (!env->external_htab) {
1207 ppc_store_sdr1(env, sregs.u.s.sdr1);
1208 }
1209
1210 /* Sync SLB */
1211 #ifdef TARGET_PPC64
1212 /*
1213 * The packed SLB array we get from KVM_GET_SREGS only contains
1214 * information about valid entries. So we flush our internal copy
1215 * to get rid of stale ones, then put all valid SLB entries back
1216 * in.
1217 */
1218 memset(env->slb, 0, sizeof(env->slb));
1219 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1220 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1221 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1222 /*
1223 * Only restore valid entries
1224 */
1225 if (rb & SLB_ESID_V) {
1226 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1227 }
1228 }
1229 #endif
1230
1231 /* Sync SRs */
1232 for (i = 0; i < 16; i++) {
1233 env->sr[i] = sregs.u.s.ppc32.sr[i];
1234 }
1235
1236 /* Sync BATs */
1237 for (i = 0; i < 8; i++) {
1238 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1239 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1240 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1241 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1242 }
1243
1244 return 0;
1245 }
1246
1247 int kvm_arch_get_registers(CPUState *cs)
1248 {
1249 PowerPCCPU *cpu = POWERPC_CPU(cs);
1250 CPUPPCState *env = &cpu->env;
1251 struct kvm_regs regs;
1252 uint32_t cr;
1253 int i, ret;
1254
1255 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1256 if (ret < 0)
1257 return ret;
1258
1259 cr = regs.cr;
1260 for (i = 7; i >= 0; i--) {
1261 env->crf[i] = cr & 15;
1262 cr >>= 4;
1263 }
1264
1265 env->ctr = regs.ctr;
1266 env->lr = regs.lr;
1267 cpu_write_xer(env, regs.xer);
1268 env->msr = regs.msr;
1269 env->nip = regs.pc;
1270
1271 env->spr[SPR_SRR0] = regs.srr0;
1272 env->spr[SPR_SRR1] = regs.srr1;
1273
1274 env->spr[SPR_SPRG0] = regs.sprg0;
1275 env->spr[SPR_SPRG1] = regs.sprg1;
1276 env->spr[SPR_SPRG2] = regs.sprg2;
1277 env->spr[SPR_SPRG3] = regs.sprg3;
1278 env->spr[SPR_SPRG4] = regs.sprg4;
1279 env->spr[SPR_SPRG5] = regs.sprg5;
1280 env->spr[SPR_SPRG6] = regs.sprg6;
1281 env->spr[SPR_SPRG7] = regs.sprg7;
1282
1283 env->spr[SPR_BOOKE_PID] = regs.pid;
1284
1285 for (i = 0;i < 32; i++)
1286 env->gpr[i] = regs.gpr[i];
1287
1288 kvm_get_fp(cs);
1289
1290 if (cap_booke_sregs) {
1291 ret = kvmppc_get_booke_sregs(cpu);
1292 if (ret < 0) {
1293 return ret;
1294 }
1295 }
1296
1297 if (cap_segstate) {
1298 ret = kvmppc_get_books_sregs(cpu);
1299 if (ret < 0) {
1300 return ret;
1301 }
1302 }
1303
1304 if (cap_hior) {
1305 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1306 }
1307
1308 if (cap_one_reg) {
1309 int i;
1310
1311 /* We deliberately ignore errors here, for kernels which have
1312 * the ONE_REG calls, but don't support the specific
1313 * registers, there's a reasonable chance things will still
1314 * work, at least until we try to migrate. */
1315 for (i = 0; i < 1024; i++) {
1316 uint64_t id = env->spr_cb[i].one_reg_id;
1317
1318 if (id != 0) {
1319 kvm_get_one_spr(cs, id, i);
1320 }
1321 }
1322
1323 #ifdef TARGET_PPC64
1324 if (msr_ts) {
1325 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1326 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1327 }
1328 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1329 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1330 }
1331 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1332 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1333 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1334 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1335 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1336 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1337 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1338 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1339 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1340 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1341 }
1342
1343 if (cap_papr) {
1344 if (kvm_get_vpa(cs) < 0) {
1345 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1346 }
1347 }
1348
1349 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1350 #endif
1351 }
1352
1353 return 0;
1354 }
1355
1356 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1357 {
1358 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1359
1360 if (irq != PPC_INTERRUPT_EXT) {
1361 return 0;
1362 }
1363
1364 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1365 return 0;
1366 }
1367
1368 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1369
1370 return 0;
1371 }
1372
1373 #if defined(TARGET_PPCEMB)
1374 #define PPC_INPUT_INT PPC40x_INPUT_INT
1375 #elif defined(TARGET_PPC64)
1376 #define PPC_INPUT_INT PPC970_INPUT_INT
1377 #else
1378 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1379 #endif
1380
1381 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1382 {
1383 PowerPCCPU *cpu = POWERPC_CPU(cs);
1384 CPUPPCState *env = &cpu->env;
1385 int r;
1386 unsigned irq;
1387
1388 qemu_mutex_lock_iothread();
1389
1390 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1391 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1392 if (!cap_interrupt_level &&
1393 run->ready_for_interrupt_injection &&
1394 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1395 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1396 {
1397 /* For now KVM disregards the 'irq' argument. However, in the
1398 * future KVM could cache it in-kernel to avoid a heavyweight exit
1399 * when reading the UIC.
1400 */
1401 irq = KVM_INTERRUPT_SET;
1402
1403 DPRINTF("injected interrupt %d\n", irq);
1404 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1405 if (r < 0) {
1406 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1407 }
1408
1409 /* Always wake up soon in case the interrupt was level based */
1410 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1411 (NANOSECONDS_PER_SECOND / 50));
1412 }
1413
1414 /* We don't know if there are more interrupts pending after this. However,
1415 * the guest will return to userspace in the course of handling this one
1416 * anyways, so we will get a chance to deliver the rest. */
1417
1418 qemu_mutex_unlock_iothread();
1419 }
1420
1421 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1422 {
1423 return MEMTXATTRS_UNSPECIFIED;
1424 }
1425
1426 int kvm_arch_process_async_events(CPUState *cs)
1427 {
1428 return cs->halted;
1429 }
1430
1431 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1432 {
1433 CPUState *cs = CPU(cpu);
1434 CPUPPCState *env = &cpu->env;
1435
1436 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1437 cs->halted = 1;
1438 cs->exception_index = EXCP_HLT;
1439 }
1440
1441 return 0;
1442 }
1443
1444 /* map dcr access to existing qemu dcr emulation */
1445 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1446 {
1447 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1448 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1449
1450 return 0;
1451 }
1452
1453 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1454 {
1455 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1456 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1457
1458 return 0;
1459 }
1460
1461 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1462 {
1463 /* Mixed endian case is not handled */
1464 uint32_t sc = debug_inst_opcode;
1465
1466 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1467 sizeof(sc), 0) ||
1468 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1469 return -EINVAL;
1470 }
1471
1472 return 0;
1473 }
1474
1475 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1476 {
1477 uint32_t sc;
1478
1479 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1480 sc != debug_inst_opcode ||
1481 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1482 sizeof(sc), 1)) {
1483 return -EINVAL;
1484 }
1485
1486 return 0;
1487 }
1488
1489 static int find_hw_breakpoint(target_ulong addr, int type)
1490 {
1491 int n;
1492
1493 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1494 <= ARRAY_SIZE(hw_debug_points));
1495
1496 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1497 if (hw_debug_points[n].addr == addr &&
1498 hw_debug_points[n].type == type) {
1499 return n;
1500 }
1501 }
1502
1503 return -1;
1504 }
1505
1506 static int find_hw_watchpoint(target_ulong addr, int *flag)
1507 {
1508 int n;
1509
1510 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1511 if (n >= 0) {
1512 *flag = BP_MEM_ACCESS;
1513 return n;
1514 }
1515
1516 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1517 if (n >= 0) {
1518 *flag = BP_MEM_WRITE;
1519 return n;
1520 }
1521
1522 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1523 if (n >= 0) {
1524 *flag = BP_MEM_READ;
1525 return n;
1526 }
1527
1528 return -1;
1529 }
1530
1531 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1532 target_ulong len, int type)
1533 {
1534 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1535 return -ENOBUFS;
1536 }
1537
1538 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1539 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1540
1541 switch (type) {
1542 case GDB_BREAKPOINT_HW:
1543 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1544 return -ENOBUFS;
1545 }
1546
1547 if (find_hw_breakpoint(addr, type) >= 0) {
1548 return -EEXIST;
1549 }
1550
1551 nb_hw_breakpoint++;
1552 break;
1553
1554 case GDB_WATCHPOINT_WRITE:
1555 case GDB_WATCHPOINT_READ:
1556 case GDB_WATCHPOINT_ACCESS:
1557 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1558 return -ENOBUFS;
1559 }
1560
1561 if (find_hw_breakpoint(addr, type) >= 0) {
1562 return -EEXIST;
1563 }
1564
1565 nb_hw_watchpoint++;
1566 break;
1567
1568 default:
1569 return -ENOSYS;
1570 }
1571
1572 return 0;
1573 }
1574
1575 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1576 target_ulong len, int type)
1577 {
1578 int n;
1579
1580 n = find_hw_breakpoint(addr, type);
1581 if (n < 0) {
1582 return -ENOENT;
1583 }
1584
1585 switch (type) {
1586 case GDB_BREAKPOINT_HW:
1587 nb_hw_breakpoint--;
1588 break;
1589
1590 case GDB_WATCHPOINT_WRITE:
1591 case GDB_WATCHPOINT_READ:
1592 case GDB_WATCHPOINT_ACCESS:
1593 nb_hw_watchpoint--;
1594 break;
1595
1596 default:
1597 return -ENOSYS;
1598 }
1599 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1600
1601 return 0;
1602 }
1603
1604 void kvm_arch_remove_all_hw_breakpoints(void)
1605 {
1606 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1607 }
1608
1609 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1610 {
1611 int n;
1612
1613 /* Software Breakpoint updates */
1614 if (kvm_sw_breakpoints_active(cs)) {
1615 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1616 }
1617
1618 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1619 <= ARRAY_SIZE(hw_debug_points));
1620 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1621
1622 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1623 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1624 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1625 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1626 switch (hw_debug_points[n].type) {
1627 case GDB_BREAKPOINT_HW:
1628 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1629 break;
1630 case GDB_WATCHPOINT_WRITE:
1631 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1632 break;
1633 case GDB_WATCHPOINT_READ:
1634 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1635 break;
1636 case GDB_WATCHPOINT_ACCESS:
1637 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1638 KVMPPC_DEBUG_WATCH_READ;
1639 break;
1640 default:
1641 cpu_abort(cs, "Unsupported breakpoint type\n");
1642 }
1643 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1644 }
1645 }
1646 }
1647
1648 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1649 {
1650 CPUState *cs = CPU(cpu);
1651 CPUPPCState *env = &cpu->env;
1652 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1653 int handle = 0;
1654 int n;
1655 int flag = 0;
1656
1657 if (cs->singlestep_enabled) {
1658 handle = 1;
1659 } else if (arch_info->status) {
1660 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1661 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1662 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1663 if (n >= 0) {
1664 handle = 1;
1665 }
1666 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1667 KVMPPC_DEBUG_WATCH_WRITE)) {
1668 n = find_hw_watchpoint(arch_info->address, &flag);
1669 if (n >= 0) {
1670 handle = 1;
1671 cs->watchpoint_hit = &hw_watchpoint;
1672 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1673 hw_watchpoint.flags = flag;
1674 }
1675 }
1676 }
1677 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1678 handle = 1;
1679 } else {
1680 /* QEMU is not able to handle debug exception, so inject
1681 * program exception to guest;
1682 * Yes program exception NOT debug exception !!
1683 * When QEMU is using debug resources then debug exception must
1684 * be always set. To achieve this we set MSR_DE and also set
1685 * MSRP_DEP so guest cannot change MSR_DE.
1686 * When emulating debug resource for guest we want guest
1687 * to control MSR_DE (enable/disable debug interrupt on need).
1688 * Supporting both configurations are NOT possible.
1689 * So the result is that we cannot share debug resources
1690 * between QEMU and Guest on BOOKE architecture.
1691 * In the current design QEMU gets the priority over guest,
1692 * this means that if QEMU is using debug resources then guest
1693 * cannot use them;
1694 * For software breakpoint QEMU uses a privileged instruction;
1695 * So there cannot be any reason that we are here for guest
1696 * set debug exception, only possibility is guest executed a
1697 * privileged / illegal instruction and that's why we are
1698 * injecting a program interrupt.
1699 */
1700
1701 cpu_synchronize_state(cs);
1702 /* env->nip is PC, so increment this by 4 to use
1703 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1704 */
1705 env->nip += 4;
1706 cs->exception_index = POWERPC_EXCP_PROGRAM;
1707 env->error_code = POWERPC_EXCP_INVAL;
1708 ppc_cpu_do_interrupt(cs);
1709 }
1710
1711 return handle;
1712 }
1713
1714 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1715 {
1716 PowerPCCPU *cpu = POWERPC_CPU(cs);
1717 CPUPPCState *env = &cpu->env;
1718 int ret;
1719
1720 qemu_mutex_lock_iothread();
1721
1722 switch (run->exit_reason) {
1723 case KVM_EXIT_DCR:
1724 if (run->dcr.is_write) {
1725 DPRINTF("handle dcr write\n");
1726 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1727 } else {
1728 DPRINTF("handle dcr read\n");
1729 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1730 }
1731 break;
1732 case KVM_EXIT_HLT:
1733 DPRINTF("handle halt\n");
1734 ret = kvmppc_handle_halt(cpu);
1735 break;
1736 #if defined(TARGET_PPC64)
1737 case KVM_EXIT_PAPR_HCALL:
1738 DPRINTF("handle PAPR hypercall\n");
1739 run->papr_hcall.ret = spapr_hypercall(cpu,
1740 run->papr_hcall.nr,
1741 run->papr_hcall.args);
1742 ret = 0;
1743 break;
1744 #endif
1745 case KVM_EXIT_EPR:
1746 DPRINTF("handle epr\n");
1747 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1748 ret = 0;
1749 break;
1750 case KVM_EXIT_WATCHDOG:
1751 DPRINTF("handle watchdog expiry\n");
1752 watchdog_perform_action();
1753 ret = 0;
1754 break;
1755
1756 case KVM_EXIT_DEBUG:
1757 DPRINTF("handle debug exception\n");
1758 if (kvm_handle_debug(cpu, run)) {
1759 ret = EXCP_DEBUG;
1760 break;
1761 }
1762 /* re-enter, this exception was guest-internal */
1763 ret = 0;
1764 break;
1765
1766 default:
1767 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1768 ret = -1;
1769 break;
1770 }
1771
1772 qemu_mutex_unlock_iothread();
1773 return ret;
1774 }
1775
1776 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1777 {
1778 CPUState *cs = CPU(cpu);
1779 uint32_t bits = tsr_bits;
1780 struct kvm_one_reg reg = {
1781 .id = KVM_REG_PPC_OR_TSR,
1782 .addr = (uintptr_t) &bits,
1783 };
1784
1785 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1786 }
1787
1788 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1789 {
1790
1791 CPUState *cs = CPU(cpu);
1792 uint32_t bits = tsr_bits;
1793 struct kvm_one_reg reg = {
1794 .id = KVM_REG_PPC_CLEAR_TSR,
1795 .addr = (uintptr_t) &bits,
1796 };
1797
1798 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1799 }
1800
1801 int kvmppc_set_tcr(PowerPCCPU *cpu)
1802 {
1803 CPUState *cs = CPU(cpu);
1804 CPUPPCState *env = &cpu->env;
1805 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1806
1807 struct kvm_one_reg reg = {
1808 .id = KVM_REG_PPC_TCR,
1809 .addr = (uintptr_t) &tcr,
1810 };
1811
1812 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1813 }
1814
1815 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1816 {
1817 CPUState *cs = CPU(cpu);
1818 int ret;
1819
1820 if (!kvm_enabled()) {
1821 return -1;
1822 }
1823
1824 if (!cap_ppc_watchdog) {
1825 printf("warning: KVM does not support watchdog");
1826 return -1;
1827 }
1828
1829 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1830 if (ret < 0) {
1831 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1832 __func__, strerror(-ret));
1833 return ret;
1834 }
1835
1836 return ret;
1837 }
1838
1839 static int read_cpuinfo(const char *field, char *value, int len)
1840 {
1841 FILE *f;
1842 int ret = -1;
1843 int field_len = strlen(field);
1844 char line[512];
1845
1846 f = fopen("/proc/cpuinfo", "r");
1847 if (!f) {
1848 return -1;
1849 }
1850
1851 do {
1852 if (!fgets(line, sizeof(line), f)) {
1853 break;
1854 }
1855 if (!strncmp(line, field, field_len)) {
1856 pstrcpy(value, len, line);
1857 ret = 0;
1858 break;
1859 }
1860 } while(*line);
1861
1862 fclose(f);
1863
1864 return ret;
1865 }
1866
1867 uint32_t kvmppc_get_tbfreq(void)
1868 {
1869 char line[512];
1870 char *ns;
1871 uint32_t retval = NANOSECONDS_PER_SECOND;
1872
1873 if (read_cpuinfo("timebase", line, sizeof(line))) {
1874 return retval;
1875 }
1876
1877 if (!(ns = strchr(line, ':'))) {
1878 return retval;
1879 }
1880
1881 ns++;
1882
1883 return atoi(ns);
1884 }
1885
1886 bool kvmppc_get_host_serial(char **value)
1887 {
1888 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1889 NULL);
1890 }
1891
1892 bool kvmppc_get_host_model(char **value)
1893 {
1894 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1895 }
1896
1897 /* Try to find a device tree node for a CPU with clock-frequency property */
1898 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1899 {
1900 struct dirent *dirp;
1901 DIR *dp;
1902
1903 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1904 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1905 return -1;
1906 }
1907
1908 buf[0] = '\0';
1909 while ((dirp = readdir(dp)) != NULL) {
1910 FILE *f;
1911 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1912 dirp->d_name);
1913 f = fopen(buf, "r");
1914 if (f) {
1915 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1916 fclose(f);
1917 break;
1918 }
1919 buf[0] = '\0';
1920 }
1921 closedir(dp);
1922 if (buf[0] == '\0') {
1923 printf("Unknown host!\n");
1924 return -1;
1925 }
1926
1927 return 0;
1928 }
1929
1930 static uint64_t kvmppc_read_int_dt(const char *filename)
1931 {
1932 union {
1933 uint32_t v32;
1934 uint64_t v64;
1935 } u;
1936 FILE *f;
1937 int len;
1938
1939 f = fopen(filename, "rb");
1940 if (!f) {
1941 return -1;
1942 }
1943
1944 len = fread(&u, 1, sizeof(u), f);
1945 fclose(f);
1946 switch (len) {
1947 case 4:
1948 /* property is a 32-bit quantity */
1949 return be32_to_cpu(u.v32);
1950 case 8:
1951 return be64_to_cpu(u.v64);
1952 }
1953
1954 return 0;
1955 }
1956
1957 /* Read a CPU node property from the host device tree that's a single
1958 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1959 * (can't find or open the property, or doesn't understand the
1960 * format) */
1961 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1962 {
1963 char buf[PATH_MAX], *tmp;
1964 uint64_t val;
1965
1966 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1967 return -1;
1968 }
1969
1970 tmp = g_strdup_printf("%s/%s", buf, propname);
1971 val = kvmppc_read_int_dt(tmp);
1972 g_free(tmp);
1973
1974 return val;
1975 }
1976
1977 uint64_t kvmppc_get_clockfreq(void)
1978 {
1979 return kvmppc_read_int_cpu_dt("clock-frequency");
1980 }
1981
1982 uint32_t kvmppc_get_vmx(void)
1983 {
1984 return kvmppc_read_int_cpu_dt("ibm,vmx");
1985 }
1986
1987 uint32_t kvmppc_get_dfp(void)
1988 {
1989 return kvmppc_read_int_cpu_dt("ibm,dfp");
1990 }
1991
1992 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1993 {
1994 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1995 CPUState *cs = CPU(cpu);
1996
1997 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1998 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1999 return 0;
2000 }
2001
2002 return 1;
2003 }
2004
2005 int kvmppc_get_hasidle(CPUPPCState *env)
2006 {
2007 struct kvm_ppc_pvinfo pvinfo;
2008
2009 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2010 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2011 return 1;
2012 }
2013
2014 return 0;
2015 }
2016
2017 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2018 {
2019 uint32_t *hc = (uint32_t*)buf;
2020 struct kvm_ppc_pvinfo pvinfo;
2021
2022 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2023 memcpy(buf, pvinfo.hcall, buf_len);
2024 return 0;
2025 }
2026
2027 /*
2028 * Fallback to always fail hypercalls regardless of endianness:
2029 *
2030 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2031 * li r3, -1
2032 * b .+8 (becomes nop in wrong endian)
2033 * bswap32(li r3, -1)
2034 */
2035
2036 hc[0] = cpu_to_be32(0x08000048);
2037 hc[1] = cpu_to_be32(0x3860ffff);
2038 hc[2] = cpu_to_be32(0x48000008);
2039 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2040
2041 return 1;
2042 }
2043
2044 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2045 {
2046 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2047 }
2048
2049 void kvmppc_enable_logical_ci_hcalls(void)
2050 {
2051 /*
2052 * FIXME: it would be nice if we could detect the cases where
2053 * we're using a device which requires the in kernel
2054 * implementation of these hcalls, but the kernel lacks them and
2055 * produce a warning.
2056 */
2057 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2058 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2059 }
2060
2061 void kvmppc_enable_set_mode_hcall(void)
2062 {
2063 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2064 }
2065
2066 void kvmppc_enable_clear_ref_mod_hcalls(void)
2067 {
2068 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2069 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2070 }
2071
2072 void kvmppc_set_papr(PowerPCCPU *cpu)
2073 {
2074 CPUState *cs = CPU(cpu);
2075 int ret;
2076
2077 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2078 if (ret) {
2079 error_report("This vCPU type or KVM version does not support PAPR");
2080 exit(1);
2081 }
2082
2083 /* Update the capability flag so we sync the right information
2084 * with kvm */
2085 cap_papr = 1;
2086 }
2087
2088 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2089 {
2090 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2091 }
2092
2093 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2094 {
2095 CPUState *cs = CPU(cpu);
2096 int ret;
2097
2098 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2099 if (ret && mpic_proxy) {
2100 error_report("This KVM version does not support EPR");
2101 exit(1);
2102 }
2103 }
2104
2105 int kvmppc_smt_threads(void)
2106 {
2107 return cap_ppc_smt ? cap_ppc_smt : 1;
2108 }
2109
2110 #ifdef TARGET_PPC64
2111 off_t kvmppc_alloc_rma(void **rma)
2112 {
2113 off_t size;
2114 int fd;
2115 struct kvm_allocate_rma ret;
2116
2117 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2118 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2119 * not necessary on this hardware
2120 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2121 *
2122 * FIXME: We should allow the user to force contiguous RMA
2123 * allocation in the cap_ppc_rma==1 case.
2124 */
2125 if (cap_ppc_rma < 2) {
2126 return 0;
2127 }
2128
2129 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2130 if (fd < 0) {
2131 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2132 strerror(errno));
2133 return -1;
2134 }
2135
2136 size = MIN(ret.rma_size, 256ul << 20);
2137
2138 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2139 if (*rma == MAP_FAILED) {
2140 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2141 return -1;
2142 };
2143
2144 return size;
2145 }
2146
2147 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2148 {
2149 struct kvm_ppc_smmu_info info;
2150 long rampagesize, best_page_shift;
2151 int i;
2152
2153 if (cap_ppc_rma >= 2) {
2154 return current_size;
2155 }
2156
2157 /* Find the largest hardware supported page size that's less than
2158 * or equal to the (logical) backing page size of guest RAM */
2159 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2160 rampagesize = getrampagesize();
2161 best_page_shift = 0;
2162
2163 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2164 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2165
2166 if (!sps->page_shift) {
2167 continue;
2168 }
2169
2170 if ((sps->page_shift > best_page_shift)
2171 && ((1UL << sps->page_shift) <= rampagesize)) {
2172 best_page_shift = sps->page_shift;
2173 }
2174 }
2175
2176 return MIN(current_size,
2177 1ULL << (best_page_shift + hash_shift - 7));
2178 }
2179 #endif
2180
2181 bool kvmppc_spapr_use_multitce(void)
2182 {
2183 return cap_spapr_multitce;
2184 }
2185
2186 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2187 bool need_vfio)
2188 {
2189 struct kvm_create_spapr_tce args = {
2190 .liobn = liobn,
2191 .window_size = window_size,
2192 };
2193 long len;
2194 int fd;
2195 void *table;
2196
2197 /* Must set fd to -1 so we don't try to munmap when called for
2198 * destroying the table, which the upper layers -will- do
2199 */
2200 *pfd = -1;
2201 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2202 return NULL;
2203 }
2204
2205 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2206 if (fd < 0) {
2207 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2208 liobn);
2209 return NULL;
2210 }
2211
2212 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2213 /* FIXME: round this up to page size */
2214
2215 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2216 if (table == MAP_FAILED) {
2217 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2218 liobn);
2219 close(fd);
2220 return NULL;
2221 }
2222
2223 *pfd = fd;
2224 return table;
2225 }
2226
2227 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2228 {
2229 long len;
2230
2231 if (fd < 0) {
2232 return -1;
2233 }
2234
2235 len = nb_table * sizeof(uint64_t);
2236 if ((munmap(table, len) < 0) ||
2237 (close(fd) < 0)) {
2238 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2239 strerror(errno));
2240 /* Leak the table */
2241 }
2242
2243 return 0;
2244 }
2245
2246 int kvmppc_reset_htab(int shift_hint)
2247 {
2248 uint32_t shift = shift_hint;
2249
2250 if (!kvm_enabled()) {
2251 /* Full emulation, tell caller to allocate htab itself */
2252 return 0;
2253 }
2254 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2255 int ret;
2256 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2257 if (ret == -ENOTTY) {
2258 /* At least some versions of PR KVM advertise the
2259 * capability, but don't implement the ioctl(). Oops.
2260 * Return 0 so that we allocate the htab in qemu, as is
2261 * correct for PR. */
2262 return 0;
2263 } else if (ret < 0) {
2264 return ret;
2265 }
2266 return shift;
2267 }
2268
2269 /* We have a kernel that predates the htab reset calls. For PR
2270 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2271 * this era, it has allocated a 16MB fixed size hash table
2272 * already. Kernels of this era have the GET_PVINFO capability
2273 * only on PR, so we use this hack to determine the right
2274 * answer */
2275 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2276 /* PR - tell caller to allocate htab */
2277 return 0;
2278 } else {
2279 /* HV - assume 16MB kernel allocated htab */
2280 return 24;
2281 }
2282 }
2283
2284 static inline uint32_t mfpvr(void)
2285 {
2286 uint32_t pvr;
2287
2288 asm ("mfpvr %0"
2289 : "=r"(pvr));
2290 return pvr;
2291 }
2292
2293 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2294 {
2295 if (on) {
2296 *word |= flags;
2297 } else {
2298 *word &= ~flags;
2299 }
2300 }
2301
2302 static void kvmppc_host_cpu_initfn(Object *obj)
2303 {
2304 assert(kvm_enabled());
2305 }
2306
2307 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2308 {
2309 DeviceClass *dc = DEVICE_CLASS(oc);
2310 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2311 uint32_t vmx = kvmppc_get_vmx();
2312 uint32_t dfp = kvmppc_get_dfp();
2313 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2314 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2315
2316 /* Now fix up the class with information we can query from the host */
2317 pcc->pvr = mfpvr();
2318
2319 if (vmx != -1) {
2320 /* Only override when we know what the host supports */
2321 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2322 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2323 }
2324 if (dfp != -1) {
2325 /* Only override when we know what the host supports */
2326 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2327 }
2328
2329 if (dcache_size != -1) {
2330 pcc->l1_dcache_size = dcache_size;
2331 }
2332
2333 if (icache_size != -1) {
2334 pcc->l1_icache_size = icache_size;
2335 }
2336
2337 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2338 dc->cannot_destroy_with_object_finalize_yet = true;
2339 }
2340
2341 bool kvmppc_has_cap_epr(void)
2342 {
2343 return cap_epr;
2344 }
2345
2346 bool kvmppc_has_cap_htab_fd(void)
2347 {
2348 return cap_htab_fd;
2349 }
2350
2351 bool kvmppc_has_cap_fixup_hcalls(void)
2352 {
2353 return cap_fixup_hcalls;
2354 }
2355
2356 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2357 {
2358 ObjectClass *oc = OBJECT_CLASS(pcc);
2359
2360 while (oc && !object_class_is_abstract(oc)) {
2361 oc = object_class_get_parent(oc);
2362 }
2363 assert(oc);
2364
2365 return POWERPC_CPU_CLASS(oc);
2366 }
2367
2368 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2369 {
2370 uint32_t host_pvr = mfpvr();
2371 PowerPCCPUClass *pvr_pcc;
2372
2373 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2374 if (pvr_pcc == NULL) {
2375 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2376 }
2377
2378 return pvr_pcc;
2379 }
2380
2381 static int kvm_ppc_register_host_cpu_type(void)
2382 {
2383 TypeInfo type_info = {
2384 .name = TYPE_HOST_POWERPC_CPU,
2385 .instance_init = kvmppc_host_cpu_initfn,
2386 .class_init = kvmppc_host_cpu_class_init,
2387 };
2388 PowerPCCPUClass *pvr_pcc;
2389 DeviceClass *dc;
2390
2391 pvr_pcc = kvm_ppc_get_host_cpu_class();
2392 if (pvr_pcc == NULL) {
2393 return -1;
2394 }
2395 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2396 type_register(&type_info);
2397
2398 /* Register generic family CPU class for a family */
2399 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2400 dc = DEVICE_CLASS(pvr_pcc);
2401 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2402 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2403 type_register(&type_info);
2404
2405 #if defined(TARGET_PPC64)
2406 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2407 type_info.parent = TYPE_SPAPR_CPU_CORE,
2408 type_info.instance_size = sizeof(sPAPRCPUCore);
2409 type_info.instance_init = NULL;
2410 type_info.class_init = spapr_cpu_core_class_init;
2411 type_info.class_data = (void *) "host";
2412 type_register(&type_info);
2413 g_free((void *)type_info.name);
2414
2415 /* Register generic spapr CPU family class for current host CPU type */
2416 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2417 type_info.class_data = (void *) dc->desc;
2418 type_register(&type_info);
2419 g_free((void *)type_info.name);
2420 #endif
2421
2422 return 0;
2423 }
2424
2425 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2426 {
2427 struct kvm_rtas_token_args args = {
2428 .token = token,
2429 };
2430
2431 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2432 return -ENOENT;
2433 }
2434
2435 strncpy(args.name, function, sizeof(args.name));
2436
2437 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2438 }
2439
2440 int kvmppc_get_htab_fd(bool write)
2441 {
2442 struct kvm_get_htab_fd s = {
2443 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2444 .start_index = 0,
2445 };
2446
2447 if (!cap_htab_fd) {
2448 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2449 return -1;
2450 }
2451
2452 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2453 }
2454
2455 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2456 {
2457 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2458 uint8_t buf[bufsize];
2459 ssize_t rc;
2460
2461 do {
2462 rc = read(fd, buf, bufsize);
2463 if (rc < 0) {
2464 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2465 strerror(errno));
2466 return rc;
2467 } else if (rc) {
2468 uint8_t *buffer = buf;
2469 ssize_t n = rc;
2470 while (n) {
2471 struct kvm_get_htab_header *head =
2472 (struct kvm_get_htab_header *) buffer;
2473 size_t chunksize = sizeof(*head) +
2474 HASH_PTE_SIZE_64 * head->n_valid;
2475
2476 qemu_put_be32(f, head->index);
2477 qemu_put_be16(f, head->n_valid);
2478 qemu_put_be16(f, head->n_invalid);
2479 qemu_put_buffer(f, (void *)(head + 1),
2480 HASH_PTE_SIZE_64 * head->n_valid);
2481
2482 buffer += chunksize;
2483 n -= chunksize;
2484 }
2485 }
2486 } while ((rc != 0)
2487 && ((max_ns < 0)
2488 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2489
2490 return (rc == 0) ? 1 : 0;
2491 }
2492
2493 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2494 uint16_t n_valid, uint16_t n_invalid)
2495 {
2496 struct kvm_get_htab_header *buf;
2497 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2498 ssize_t rc;
2499
2500 buf = alloca(chunksize);
2501 buf->index = index;
2502 buf->n_valid = n_valid;
2503 buf->n_invalid = n_invalid;
2504
2505 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2506
2507 rc = write(fd, buf, chunksize);
2508 if (rc < 0) {
2509 fprintf(stderr, "Error writing KVM hash table: %s\n",
2510 strerror(errno));
2511 return rc;
2512 }
2513 if (rc != chunksize) {
2514 /* We should never get a short write on a single chunk */
2515 fprintf(stderr, "Short write, restoring KVM hash table\n");
2516 return -1;
2517 }
2518 return 0;
2519 }
2520
2521 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2522 {
2523 return true;
2524 }
2525
2526 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2527 {
2528 return 1;
2529 }
2530
2531 int kvm_arch_on_sigbus(int code, void *addr)
2532 {
2533 return 1;
2534 }
2535
2536 void kvm_arch_init_irq_routing(KVMState *s)
2537 {
2538 }
2539
2540 struct kvm_get_htab_buf {
2541 struct kvm_get_htab_header header;
2542 /*
2543 * We require one extra byte for read
2544 */
2545 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2546 };
2547
2548 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2549 {
2550 int htab_fd;
2551 struct kvm_get_htab_fd ghf;
2552 struct kvm_get_htab_buf *hpte_buf;
2553
2554 ghf.flags = 0;
2555 ghf.start_index = pte_index;
2556 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2557 if (htab_fd < 0) {
2558 goto error_out;
2559 }
2560
2561 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2562 /*
2563 * Read the hpte group
2564 */
2565 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2566 goto out_close;
2567 }
2568
2569 close(htab_fd);
2570 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2571
2572 out_close:
2573 g_free(hpte_buf);
2574 close(htab_fd);
2575 error_out:
2576 return 0;
2577 }
2578
2579 void kvmppc_hash64_free_pteg(uint64_t token)
2580 {
2581 struct kvm_get_htab_buf *htab_buf;
2582
2583 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2584 hpte);
2585 g_free(htab_buf);
2586 return;
2587 }
2588
2589 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2590 target_ulong pte0, target_ulong pte1)
2591 {
2592 int htab_fd;
2593 struct kvm_get_htab_fd ghf;
2594 struct kvm_get_htab_buf hpte_buf;
2595
2596 ghf.flags = 0;
2597 ghf.start_index = 0; /* Ignored */
2598 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2599 if (htab_fd < 0) {
2600 goto error_out;
2601 }
2602
2603 hpte_buf.header.n_valid = 1;
2604 hpte_buf.header.n_invalid = 0;
2605 hpte_buf.header.index = pte_index;
2606 hpte_buf.hpte[0] = pte0;
2607 hpte_buf.hpte[1] = pte1;
2608 /*
2609 * Write the hpte entry.
2610 * CAUTION: write() has the warn_unused_result attribute. Hence we
2611 * need to check the return value, even though we do nothing.
2612 */
2613 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2614 goto out_close;
2615 }
2616
2617 out_close:
2618 close(htab_fd);
2619 return;
2620
2621 error_out:
2622 return;
2623 }
2624
2625 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2626 uint64_t address, uint32_t data, PCIDevice *dev)
2627 {
2628 return 0;
2629 }
2630
2631 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2632 int vector, PCIDevice *dev)
2633 {
2634 return 0;
2635 }
2636
2637 int kvm_arch_release_virq_post(int virq)
2638 {
2639 return 0;
2640 }
2641
2642 int kvm_arch_msi_data_to_gsi(uint32_t data)
2643 {
2644 return data & 0xffff;
2645 }
2646
2647 int kvmppc_enable_hwrng(void)
2648 {
2649 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2650 return -1;
2651 }
2652
2653 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2654 }