]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/kvm.c
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-3.1-20181108' into staging
[mirror_qemu.git] / target / ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52
53 //#define DEBUG_KVM
54
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60 do { } while (0)
61 #endif
62
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
64
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66 KVM_CAP_LAST_INFO
67 };
68
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm; /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 static int cap_ppc_nested_kvm_hv;
95
96 static uint32_t debug_inst_opcode;
97
98 /* XXX We have a race condition where we actually have a level triggered
99 * interrupt, but the infrastructure can't expose that yet, so the guest
100 * takes but ignores it, goes to sleep and never gets notified that there's
101 * still an interrupt pending.
102 *
103 * As a quick workaround, let's just wake up again 20 ms after we injected
104 * an interrupt. That way we can assure that we're always reinjecting
105 * interrupts in case the guest swallowed them.
106 */
107 static QEMUTimer *idle_timer;
108
109 static void kvm_kick_cpu(void *opaque)
110 {
111 PowerPCCPU *cpu = opaque;
112
113 qemu_cpu_kick(CPU(cpu));
114 }
115
116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
117 * should only be used for fallback tests - generally we should use
118 * explicit capabilities for the features we want, rather than
119 * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122 /* Assume KVM-PR if the GET_PVINFO capability is available */
123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
128
129 int kvm_arch_init(MachineState *ms, KVMState *s)
130 {
131 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
140 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144 /* Note: we don't set cap_papr here, because this capability is
145 * only activated after this by kvmppc_set_papr() */
146 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
153 kvmppc_get_cpu_characteristics(s);
154 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
155 /*
156 * Note: setting it to false because there is not such capability
157 * in KVM at this moment.
158 *
159 * TODO: call kvm_vm_check_extension() with the right capability
160 * after the kernel starts implementing it.*/
161 cap_ppc_pvr_compat = false;
162
163 if (!cap_interrupt_level) {
164 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165 "VM to stall at times!\n");
166 }
167
168 kvm_ppc_register_host_cpu_type(ms);
169
170 return 0;
171 }
172
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 {
175 return 0;
176 }
177
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 {
180 CPUPPCState *cenv = &cpu->env;
181 CPUState *cs = CPU(cpu);
182 struct kvm_sregs sregs;
183 int ret;
184
185 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186 /* What we're really trying to say is "if we're on BookE, we use
187 the native PVR for now". This is the only sane way to check
188 it though, so we potentially confuse users that they can run
189 BookE guests on BookS. Let's hope nobody dares enough :) */
190 return 0;
191 } else {
192 if (!cap_segstate) {
193 fprintf(stderr, "kvm error: missing PVR setting capability\n");
194 return -ENOSYS;
195 }
196 }
197
198 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
199 if (ret) {
200 return ret;
201 }
202
203 sregs.pvr = cenv->spr[SPR_PVR];
204 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 }
206
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 {
210 CPUPPCState *env = &cpu->env;
211 CPUState *cs = CPU(cpu);
212 struct kvm_book3e_206_tlb_params params = {};
213 struct kvm_config_tlb cfg = {};
214 unsigned int entries = 0;
215 int ret, i;
216
217 if (!kvm_enabled() ||
218 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
219 return 0;
220 }
221
222 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223
224 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225 params.tlb_sizes[i] = booke206_tlb_size(env, i);
226 params.tlb_ways[i] = booke206_tlb_ways(env, i);
227 entries += params.tlb_sizes[i];
228 }
229
230 assert(entries == env->nb_tlb);
231 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232
233 env->tlb_dirty = true;
234
235 cfg.array = (uintptr_t)env->tlb.tlbm;
236 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237 cfg.params = (uintptr_t)&params;
238 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239
240 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
241 if (ret < 0) {
242 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243 __func__, strerror(-ret));
244 return ret;
245 }
246
247 env->kvm_sw_tlb = true;
248 return 0;
249 }
250
251
252 #if defined(TARGET_PPC64)
253 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
254 {
255 int ret;
256
257 assert(kvm_state != NULL);
258
259 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
260 error_setg(errp, "KVM doesn't expose the MMU features it supports");
261 error_append_hint(errp, "Consider switching to a newer KVM\n");
262 return;
263 }
264
265 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
266 if (ret == 0) {
267 return;
268 }
269
270 error_setg_errno(errp, -ret,
271 "KVM failed to provide the MMU features it supports");
272 }
273
274 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
275 {
276 KVMState *s = KVM_STATE(current_machine->accelerator);
277 struct ppc_radix_page_info *radix_page_info;
278 struct kvm_ppc_rmmu_info rmmu_info;
279 int i;
280
281 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
282 return NULL;
283 }
284 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
285 return NULL;
286 }
287 radix_page_info = g_malloc0(sizeof(*radix_page_info));
288 radix_page_info->count = 0;
289 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
290 if (rmmu_info.ap_encodings[i]) {
291 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
292 radix_page_info->count++;
293 }
294 }
295 return radix_page_info;
296 }
297
298 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
299 bool radix, bool gtse,
300 uint64_t proc_tbl)
301 {
302 CPUState *cs = CPU(cpu);
303 int ret;
304 uint64_t flags = 0;
305 struct kvm_ppc_mmuv3_cfg cfg = {
306 .process_table = proc_tbl,
307 };
308
309 if (radix) {
310 flags |= KVM_PPC_MMUV3_RADIX;
311 }
312 if (gtse) {
313 flags |= KVM_PPC_MMUV3_GTSE;
314 }
315 cfg.flags = flags;
316 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
317 switch (ret) {
318 case 0:
319 return H_SUCCESS;
320 case -EINVAL:
321 return H_PARAMETER;
322 case -ENODEV:
323 return H_NOT_AVAILABLE;
324 default:
325 return H_HARDWARE;
326 }
327 }
328
329 bool kvmppc_hpt_needs_host_contiguous_pages(void)
330 {
331 static struct kvm_ppc_smmu_info smmu_info;
332
333 if (!kvm_enabled()) {
334 return false;
335 }
336
337 kvm_get_smmu_info(&smmu_info, &error_fatal);
338 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
339 }
340
341 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
342 {
343 struct kvm_ppc_smmu_info smmu_info;
344 int iq, ik, jq, jk;
345 Error *local_err = NULL;
346
347 /* For now, we only have anything to check on hash64 MMUs */
348 if (!cpu->hash64_opts || !kvm_enabled()) {
349 return;
350 }
351
352 kvm_get_smmu_info(&smmu_info, &local_err);
353 if (local_err) {
354 error_propagate(errp, local_err);
355 return;
356 }
357
358 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
359 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
360 error_setg(errp,
361 "KVM does not support 1TiB segments which guest expects");
362 return;
363 }
364
365 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
366 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
367 smmu_info.slb_size, cpu->hash64_opts->slb_size);
368 return;
369 }
370
371 /*
372 * Verify that every pagesize supported by the cpu model is
373 * supported by KVM with the same encodings
374 */
375 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
376 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
377 struct kvm_ppc_one_seg_page_size *ksps;
378
379 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
380 if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
381 break;
382 }
383 }
384 if (ik >= ARRAY_SIZE(smmu_info.sps)) {
385 error_setg(errp, "KVM doesn't support for base page shift %u",
386 qsps->page_shift);
387 return;
388 }
389
390 ksps = &smmu_info.sps[ik];
391 if (ksps->slb_enc != qsps->slb_enc) {
392 error_setg(errp,
393 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
394 ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
395 return;
396 }
397
398 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
399 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
400 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
401 break;
402 }
403 }
404
405 if (jk >= ARRAY_SIZE(ksps->enc)) {
406 error_setg(errp, "KVM doesn't support page shift %u/%u",
407 qsps->enc[jq].page_shift, qsps->page_shift);
408 return;
409 }
410 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
411 error_setg(errp,
412 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
413 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
414 qsps->page_shift, qsps->enc[jq].pte_enc);
415 return;
416 }
417 }
418 }
419
420 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
421 /* Mostly what guest pagesizes we can use are related to the
422 * host pages used to map guest RAM, which is handled in the
423 * platform code. Cache-Inhibited largepages (64k) however are
424 * used for I/O, so if they're mapped to the host at all it
425 * will be a normal mapping, not a special hugepage one used
426 * for RAM. */
427 if (getpagesize() < 0x10000) {
428 error_setg(errp,
429 "KVM can't supply 64kiB CI pages, which guest expects");
430 }
431 }
432 }
433 #endif /* !defined (TARGET_PPC64) */
434
435 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
436 {
437 return POWERPC_CPU(cpu)->vcpu_id;
438 }
439
440 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
441 * book3s supports only 1 watchpoint, so array size
442 * of 4 is sufficient for now.
443 */
444 #define MAX_HW_BKPTS 4
445
446 static struct HWBreakpoint {
447 target_ulong addr;
448 int type;
449 } hw_debug_points[MAX_HW_BKPTS];
450
451 static CPUWatchpoint hw_watchpoint;
452
453 /* Default there is no breakpoint and watchpoint supported */
454 static int max_hw_breakpoint;
455 static int max_hw_watchpoint;
456 static int nb_hw_breakpoint;
457 static int nb_hw_watchpoint;
458
459 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
460 {
461 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
462 max_hw_breakpoint = 2;
463 max_hw_watchpoint = 2;
464 }
465
466 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
467 fprintf(stderr, "Error initializing h/w breakpoints\n");
468 return;
469 }
470 }
471
472 int kvm_arch_init_vcpu(CPUState *cs)
473 {
474 PowerPCCPU *cpu = POWERPC_CPU(cs);
475 CPUPPCState *cenv = &cpu->env;
476 int ret;
477
478 /* Synchronize sregs with kvm */
479 ret = kvm_arch_sync_sregs(cpu);
480 if (ret) {
481 if (ret == -EINVAL) {
482 error_report("Register sync failed... If you're using kvm-hv.ko,"
483 " only \"-cpu host\" is possible");
484 }
485 return ret;
486 }
487
488 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
489
490 switch (cenv->mmu_model) {
491 case POWERPC_MMU_BOOKE206:
492 /* This target supports access to KVM's guest TLB */
493 ret = kvm_booke206_tlb_init(cpu);
494 break;
495 case POWERPC_MMU_2_07:
496 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
497 /* KVM-HV has transactional memory on POWER8 also without the
498 * KVM_CAP_PPC_HTM extension, so enable it here instead as
499 * long as it's availble to userspace on the host. */
500 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
501 cap_htm = true;
502 }
503 }
504 break;
505 default:
506 break;
507 }
508
509 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
510 kvmppc_hw_debug_points_init(cenv);
511
512 return ret;
513 }
514
515 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
516 {
517 CPUPPCState *env = &cpu->env;
518 CPUState *cs = CPU(cpu);
519 struct kvm_dirty_tlb dirty_tlb;
520 unsigned char *bitmap;
521 int ret;
522
523 if (!env->kvm_sw_tlb) {
524 return;
525 }
526
527 bitmap = g_malloc((env->nb_tlb + 7) / 8);
528 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
529
530 dirty_tlb.bitmap = (uintptr_t)bitmap;
531 dirty_tlb.num_dirty = env->nb_tlb;
532
533 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
534 if (ret) {
535 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
536 __func__, strerror(-ret));
537 }
538
539 g_free(bitmap);
540 }
541
542 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
543 {
544 PowerPCCPU *cpu = POWERPC_CPU(cs);
545 CPUPPCState *env = &cpu->env;
546 union {
547 uint32_t u32;
548 uint64_t u64;
549 } val;
550 struct kvm_one_reg reg = {
551 .id = id,
552 .addr = (uintptr_t) &val,
553 };
554 int ret;
555
556 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
557 if (ret != 0) {
558 trace_kvm_failed_spr_get(spr, strerror(errno));
559 } else {
560 switch (id & KVM_REG_SIZE_MASK) {
561 case KVM_REG_SIZE_U32:
562 env->spr[spr] = val.u32;
563 break;
564
565 case KVM_REG_SIZE_U64:
566 env->spr[spr] = val.u64;
567 break;
568
569 default:
570 /* Don't handle this size yet */
571 abort();
572 }
573 }
574 }
575
576 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
577 {
578 PowerPCCPU *cpu = POWERPC_CPU(cs);
579 CPUPPCState *env = &cpu->env;
580 union {
581 uint32_t u32;
582 uint64_t u64;
583 } val;
584 struct kvm_one_reg reg = {
585 .id = id,
586 .addr = (uintptr_t) &val,
587 };
588 int ret;
589
590 switch (id & KVM_REG_SIZE_MASK) {
591 case KVM_REG_SIZE_U32:
592 val.u32 = env->spr[spr];
593 break;
594
595 case KVM_REG_SIZE_U64:
596 val.u64 = env->spr[spr];
597 break;
598
599 default:
600 /* Don't handle this size yet */
601 abort();
602 }
603
604 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
605 if (ret != 0) {
606 trace_kvm_failed_spr_set(spr, strerror(errno));
607 }
608 }
609
610 static int kvm_put_fp(CPUState *cs)
611 {
612 PowerPCCPU *cpu = POWERPC_CPU(cs);
613 CPUPPCState *env = &cpu->env;
614 struct kvm_one_reg reg;
615 int i;
616 int ret;
617
618 if (env->insns_flags & PPC_FLOAT) {
619 uint64_t fpscr = env->fpscr;
620 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
621
622 reg.id = KVM_REG_PPC_FPSCR;
623 reg.addr = (uintptr_t)&fpscr;
624 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
625 if (ret < 0) {
626 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
627 return ret;
628 }
629
630 for (i = 0; i < 32; i++) {
631 uint64_t vsr[2];
632
633 #ifdef HOST_WORDS_BIGENDIAN
634 vsr[0] = float64_val(env->fpr[i]);
635 vsr[1] = env->vsr[i];
636 #else
637 vsr[0] = env->vsr[i];
638 vsr[1] = float64_val(env->fpr[i]);
639 #endif
640 reg.addr = (uintptr_t) &vsr;
641 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
642
643 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
644 if (ret < 0) {
645 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
646 i, strerror(errno));
647 return ret;
648 }
649 }
650 }
651
652 if (env->insns_flags & PPC_ALTIVEC) {
653 reg.id = KVM_REG_PPC_VSCR;
654 reg.addr = (uintptr_t)&env->vscr;
655 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
656 if (ret < 0) {
657 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
658 return ret;
659 }
660
661 for (i = 0; i < 32; i++) {
662 reg.id = KVM_REG_PPC_VR(i);
663 reg.addr = (uintptr_t)&env->avr[i];
664 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
665 if (ret < 0) {
666 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
667 return ret;
668 }
669 }
670 }
671
672 return 0;
673 }
674
675 static int kvm_get_fp(CPUState *cs)
676 {
677 PowerPCCPU *cpu = POWERPC_CPU(cs);
678 CPUPPCState *env = &cpu->env;
679 struct kvm_one_reg reg;
680 int i;
681 int ret;
682
683 if (env->insns_flags & PPC_FLOAT) {
684 uint64_t fpscr;
685 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
686
687 reg.id = KVM_REG_PPC_FPSCR;
688 reg.addr = (uintptr_t)&fpscr;
689 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
690 if (ret < 0) {
691 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
692 return ret;
693 } else {
694 env->fpscr = fpscr;
695 }
696
697 for (i = 0; i < 32; i++) {
698 uint64_t vsr[2];
699
700 reg.addr = (uintptr_t) &vsr;
701 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
702
703 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
704 if (ret < 0) {
705 DPRINTF("Unable to get %s%d from KVM: %s\n",
706 vsx ? "VSR" : "FPR", i, strerror(errno));
707 return ret;
708 } else {
709 #ifdef HOST_WORDS_BIGENDIAN
710 env->fpr[i] = vsr[0];
711 if (vsx) {
712 env->vsr[i] = vsr[1];
713 }
714 #else
715 env->fpr[i] = vsr[1];
716 if (vsx) {
717 env->vsr[i] = vsr[0];
718 }
719 #endif
720 }
721 }
722 }
723
724 if (env->insns_flags & PPC_ALTIVEC) {
725 reg.id = KVM_REG_PPC_VSCR;
726 reg.addr = (uintptr_t)&env->vscr;
727 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
728 if (ret < 0) {
729 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
730 return ret;
731 }
732
733 for (i = 0; i < 32; i++) {
734 reg.id = KVM_REG_PPC_VR(i);
735 reg.addr = (uintptr_t)&env->avr[i];
736 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
737 if (ret < 0) {
738 DPRINTF("Unable to get VR%d from KVM: %s\n",
739 i, strerror(errno));
740 return ret;
741 }
742 }
743 }
744
745 return 0;
746 }
747
748 #if defined(TARGET_PPC64)
749 static int kvm_get_vpa(CPUState *cs)
750 {
751 PowerPCCPU *cpu = POWERPC_CPU(cs);
752 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
753 struct kvm_one_reg reg;
754 int ret;
755
756 reg.id = KVM_REG_PPC_VPA_ADDR;
757 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
758 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
759 if (ret < 0) {
760 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
761 return ret;
762 }
763
764 assert((uintptr_t)&spapr_cpu->slb_shadow_size
765 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
766 reg.id = KVM_REG_PPC_VPA_SLB;
767 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
769 if (ret < 0) {
770 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
771 strerror(errno));
772 return ret;
773 }
774
775 assert((uintptr_t)&spapr_cpu->dtl_size
776 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
777 reg.id = KVM_REG_PPC_VPA_DTL;
778 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
779 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
780 if (ret < 0) {
781 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
782 strerror(errno));
783 return ret;
784 }
785
786 return 0;
787 }
788
789 static int kvm_put_vpa(CPUState *cs)
790 {
791 PowerPCCPU *cpu = POWERPC_CPU(cs);
792 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
793 struct kvm_one_reg reg;
794 int ret;
795
796 /* SLB shadow or DTL can't be registered unless a master VPA is
797 * registered. That means when restoring state, if a VPA *is*
798 * registered, we need to set that up first. If not, we need to
799 * deregister the others before deregistering the master VPA */
800 assert(spapr_cpu->vpa_addr
801 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
802
803 if (spapr_cpu->vpa_addr) {
804 reg.id = KVM_REG_PPC_VPA_ADDR;
805 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
806 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
807 if (ret < 0) {
808 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
809 return ret;
810 }
811 }
812
813 assert((uintptr_t)&spapr_cpu->slb_shadow_size
814 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
815 reg.id = KVM_REG_PPC_VPA_SLB;
816 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
817 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
818 if (ret < 0) {
819 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
820 return ret;
821 }
822
823 assert((uintptr_t)&spapr_cpu->dtl_size
824 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
825 reg.id = KVM_REG_PPC_VPA_DTL;
826 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
827 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
828 if (ret < 0) {
829 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
830 strerror(errno));
831 return ret;
832 }
833
834 if (!spapr_cpu->vpa_addr) {
835 reg.id = KVM_REG_PPC_VPA_ADDR;
836 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
837 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
838 if (ret < 0) {
839 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
840 return ret;
841 }
842 }
843
844 return 0;
845 }
846 #endif /* TARGET_PPC64 */
847
848 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
849 {
850 CPUPPCState *env = &cpu->env;
851 struct kvm_sregs sregs;
852 int i;
853
854 sregs.pvr = env->spr[SPR_PVR];
855
856 if (cpu->vhyp) {
857 PPCVirtualHypervisorClass *vhc =
858 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
859 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
860 } else {
861 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
862 }
863
864 /* Sync SLB */
865 #ifdef TARGET_PPC64
866 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
867 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
868 if (env->slb[i].esid & SLB_ESID_V) {
869 sregs.u.s.ppc64.slb[i].slbe |= i;
870 }
871 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
872 }
873 #endif
874
875 /* Sync SRs */
876 for (i = 0; i < 16; i++) {
877 sregs.u.s.ppc32.sr[i] = env->sr[i];
878 }
879
880 /* Sync BATs */
881 for (i = 0; i < 8; i++) {
882 /* Beware. We have to swap upper and lower bits here */
883 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
884 | env->DBAT[1][i];
885 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
886 | env->IBAT[1][i];
887 }
888
889 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
890 }
891
892 int kvm_arch_put_registers(CPUState *cs, int level)
893 {
894 PowerPCCPU *cpu = POWERPC_CPU(cs);
895 CPUPPCState *env = &cpu->env;
896 struct kvm_regs regs;
897 int ret;
898 int i;
899
900 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
901 if (ret < 0) {
902 return ret;
903 }
904
905 regs.ctr = env->ctr;
906 regs.lr = env->lr;
907 regs.xer = cpu_read_xer(env);
908 regs.msr = env->msr;
909 regs.pc = env->nip;
910
911 regs.srr0 = env->spr[SPR_SRR0];
912 regs.srr1 = env->spr[SPR_SRR1];
913
914 regs.sprg0 = env->spr[SPR_SPRG0];
915 regs.sprg1 = env->spr[SPR_SPRG1];
916 regs.sprg2 = env->spr[SPR_SPRG2];
917 regs.sprg3 = env->spr[SPR_SPRG3];
918 regs.sprg4 = env->spr[SPR_SPRG4];
919 regs.sprg5 = env->spr[SPR_SPRG5];
920 regs.sprg6 = env->spr[SPR_SPRG6];
921 regs.sprg7 = env->spr[SPR_SPRG7];
922
923 regs.pid = env->spr[SPR_BOOKE_PID];
924
925 for (i = 0;i < 32; i++)
926 regs.gpr[i] = env->gpr[i];
927
928 regs.cr = 0;
929 for (i = 0; i < 8; i++) {
930 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
931 }
932
933 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
934 if (ret < 0)
935 return ret;
936
937 kvm_put_fp(cs);
938
939 if (env->tlb_dirty) {
940 kvm_sw_tlb_put(cpu);
941 env->tlb_dirty = false;
942 }
943
944 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
945 ret = kvmppc_put_books_sregs(cpu);
946 if (ret < 0) {
947 return ret;
948 }
949 }
950
951 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
952 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
953 }
954
955 if (cap_one_reg) {
956 int i;
957
958 /* We deliberately ignore errors here, for kernels which have
959 * the ONE_REG calls, but don't support the specific
960 * registers, there's a reasonable chance things will still
961 * work, at least until we try to migrate. */
962 for (i = 0; i < 1024; i++) {
963 uint64_t id = env->spr_cb[i].one_reg_id;
964
965 if (id != 0) {
966 kvm_put_one_spr(cs, id, i);
967 }
968 }
969
970 #ifdef TARGET_PPC64
971 if (msr_ts) {
972 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
973 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
974 }
975 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
976 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
977 }
978 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
979 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
981 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
988 }
989
990 if (cap_papr) {
991 if (kvm_put_vpa(cs) < 0) {
992 DPRINTF("Warning: Unable to set VPA information to KVM\n");
993 }
994 }
995
996 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
997 #endif /* TARGET_PPC64 */
998 }
999
1000 return ret;
1001 }
1002
1003 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1004 {
1005 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1006 }
1007
1008 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1009 {
1010 CPUPPCState *env = &cpu->env;
1011 struct kvm_sregs sregs;
1012 int ret;
1013
1014 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1015 if (ret < 0) {
1016 return ret;
1017 }
1018
1019 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1020 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1021 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1022 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1023 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1024 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1025 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1026 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1027 env->spr[SPR_DECR] = sregs.u.e.dec;
1028 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1029 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1030 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1031 }
1032
1033 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1034 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1035 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1036 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1037 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1038 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1039 }
1040
1041 if (sregs.u.e.features & KVM_SREGS_E_64) {
1042 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1043 }
1044
1045 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1046 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1047 }
1048
1049 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1050 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1051 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1052 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1053 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1054 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1055 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1056 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1057 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1058 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1059 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1060 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1061 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1062 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1063 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1064 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1065 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1066 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1067 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1068 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1069 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1070 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1071 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1072 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1073 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1074 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1075 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1076 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1077 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1078 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1079 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1080 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1081 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1082
1083 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1084 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1085 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1086 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1087 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1088 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1089 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1090 }
1091
1092 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1093 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1094 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1095 }
1096
1097 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1098 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1099 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1100 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1101 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1102 }
1103 }
1104
1105 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1106 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1107 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1108 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1109 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1110 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1111 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1112 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1113 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1114 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1115 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1116 }
1117
1118 if (sregs.u.e.features & KVM_SREGS_EXP) {
1119 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1120 }
1121
1122 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1123 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1124 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1125 }
1126
1127 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1128 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1129 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1130 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1131
1132 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1133 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1134 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1135 }
1136 }
1137
1138 return 0;
1139 }
1140
1141 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1142 {
1143 CPUPPCState *env = &cpu->env;
1144 struct kvm_sregs sregs;
1145 int ret;
1146 int i;
1147
1148 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1149 if (ret < 0) {
1150 return ret;
1151 }
1152
1153 if (!cpu->vhyp) {
1154 ppc_store_sdr1(env, sregs.u.s.sdr1);
1155 }
1156
1157 /* Sync SLB */
1158 #ifdef TARGET_PPC64
1159 /*
1160 * The packed SLB array we get from KVM_GET_SREGS only contains
1161 * information about valid entries. So we flush our internal copy
1162 * to get rid of stale ones, then put all valid SLB entries back
1163 * in.
1164 */
1165 memset(env->slb, 0, sizeof(env->slb));
1166 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1167 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1168 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1169 /*
1170 * Only restore valid entries
1171 */
1172 if (rb & SLB_ESID_V) {
1173 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1174 }
1175 }
1176 #endif
1177
1178 /* Sync SRs */
1179 for (i = 0; i < 16; i++) {
1180 env->sr[i] = sregs.u.s.ppc32.sr[i];
1181 }
1182
1183 /* Sync BATs */
1184 for (i = 0; i < 8; i++) {
1185 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1186 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1187 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1188 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1189 }
1190
1191 return 0;
1192 }
1193
1194 int kvm_arch_get_registers(CPUState *cs)
1195 {
1196 PowerPCCPU *cpu = POWERPC_CPU(cs);
1197 CPUPPCState *env = &cpu->env;
1198 struct kvm_regs regs;
1199 uint32_t cr;
1200 int i, ret;
1201
1202 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1203 if (ret < 0)
1204 return ret;
1205
1206 cr = regs.cr;
1207 for (i = 7; i >= 0; i--) {
1208 env->crf[i] = cr & 15;
1209 cr >>= 4;
1210 }
1211
1212 env->ctr = regs.ctr;
1213 env->lr = regs.lr;
1214 cpu_write_xer(env, regs.xer);
1215 env->msr = regs.msr;
1216 env->nip = regs.pc;
1217
1218 env->spr[SPR_SRR0] = regs.srr0;
1219 env->spr[SPR_SRR1] = regs.srr1;
1220
1221 env->spr[SPR_SPRG0] = regs.sprg0;
1222 env->spr[SPR_SPRG1] = regs.sprg1;
1223 env->spr[SPR_SPRG2] = regs.sprg2;
1224 env->spr[SPR_SPRG3] = regs.sprg3;
1225 env->spr[SPR_SPRG4] = regs.sprg4;
1226 env->spr[SPR_SPRG5] = regs.sprg5;
1227 env->spr[SPR_SPRG6] = regs.sprg6;
1228 env->spr[SPR_SPRG7] = regs.sprg7;
1229
1230 env->spr[SPR_BOOKE_PID] = regs.pid;
1231
1232 for (i = 0;i < 32; i++)
1233 env->gpr[i] = regs.gpr[i];
1234
1235 kvm_get_fp(cs);
1236
1237 if (cap_booke_sregs) {
1238 ret = kvmppc_get_booke_sregs(cpu);
1239 if (ret < 0) {
1240 return ret;
1241 }
1242 }
1243
1244 if (cap_segstate) {
1245 ret = kvmppc_get_books_sregs(cpu);
1246 if (ret < 0) {
1247 return ret;
1248 }
1249 }
1250
1251 if (cap_hior) {
1252 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1253 }
1254
1255 if (cap_one_reg) {
1256 int i;
1257
1258 /* We deliberately ignore errors here, for kernels which have
1259 * the ONE_REG calls, but don't support the specific
1260 * registers, there's a reasonable chance things will still
1261 * work, at least until we try to migrate. */
1262 for (i = 0; i < 1024; i++) {
1263 uint64_t id = env->spr_cb[i].one_reg_id;
1264
1265 if (id != 0) {
1266 kvm_get_one_spr(cs, id, i);
1267 }
1268 }
1269
1270 #ifdef TARGET_PPC64
1271 if (msr_ts) {
1272 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1273 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1274 }
1275 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1276 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1277 }
1278 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1279 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1280 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1281 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1282 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1284 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1288 }
1289
1290 if (cap_papr) {
1291 if (kvm_get_vpa(cs) < 0) {
1292 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1293 }
1294 }
1295
1296 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1297 #endif
1298 }
1299
1300 return 0;
1301 }
1302
1303 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1304 {
1305 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1306
1307 if (irq != PPC_INTERRUPT_EXT) {
1308 return 0;
1309 }
1310
1311 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1312 return 0;
1313 }
1314
1315 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1316
1317 return 0;
1318 }
1319
1320 #if defined(TARGET_PPC64)
1321 #define PPC_INPUT_INT PPC970_INPUT_INT
1322 #else
1323 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1324 #endif
1325
1326 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1327 {
1328 PowerPCCPU *cpu = POWERPC_CPU(cs);
1329 CPUPPCState *env = &cpu->env;
1330 int r;
1331 unsigned irq;
1332
1333 qemu_mutex_lock_iothread();
1334
1335 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1336 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1337 if (!cap_interrupt_level &&
1338 run->ready_for_interrupt_injection &&
1339 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1340 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1341 {
1342 /* For now KVM disregards the 'irq' argument. However, in the
1343 * future KVM could cache it in-kernel to avoid a heavyweight exit
1344 * when reading the UIC.
1345 */
1346 irq = KVM_INTERRUPT_SET;
1347
1348 DPRINTF("injected interrupt %d\n", irq);
1349 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1350 if (r < 0) {
1351 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1352 }
1353
1354 /* Always wake up soon in case the interrupt was level based */
1355 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1356 (NANOSECONDS_PER_SECOND / 50));
1357 }
1358
1359 /* We don't know if there are more interrupts pending after this. However,
1360 * the guest will return to userspace in the course of handling this one
1361 * anyways, so we will get a chance to deliver the rest. */
1362
1363 qemu_mutex_unlock_iothread();
1364 }
1365
1366 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1367 {
1368 return MEMTXATTRS_UNSPECIFIED;
1369 }
1370
1371 int kvm_arch_process_async_events(CPUState *cs)
1372 {
1373 return cs->halted;
1374 }
1375
1376 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1377 {
1378 CPUState *cs = CPU(cpu);
1379 CPUPPCState *env = &cpu->env;
1380
1381 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1382 cs->halted = 1;
1383 cs->exception_index = EXCP_HLT;
1384 }
1385
1386 return 0;
1387 }
1388
1389 /* map dcr access to existing qemu dcr emulation */
1390 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1391 {
1392 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1393 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1394
1395 return 0;
1396 }
1397
1398 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1399 {
1400 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1401 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1402
1403 return 0;
1404 }
1405
1406 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1407 {
1408 /* Mixed endian case is not handled */
1409 uint32_t sc = debug_inst_opcode;
1410
1411 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1412 sizeof(sc), 0) ||
1413 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1414 return -EINVAL;
1415 }
1416
1417 return 0;
1418 }
1419
1420 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1421 {
1422 uint32_t sc;
1423
1424 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1425 sc != debug_inst_opcode ||
1426 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1427 sizeof(sc), 1)) {
1428 return -EINVAL;
1429 }
1430
1431 return 0;
1432 }
1433
1434 static int find_hw_breakpoint(target_ulong addr, int type)
1435 {
1436 int n;
1437
1438 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1439 <= ARRAY_SIZE(hw_debug_points));
1440
1441 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1442 if (hw_debug_points[n].addr == addr &&
1443 hw_debug_points[n].type == type) {
1444 return n;
1445 }
1446 }
1447
1448 return -1;
1449 }
1450
1451 static int find_hw_watchpoint(target_ulong addr, int *flag)
1452 {
1453 int n;
1454
1455 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1456 if (n >= 0) {
1457 *flag = BP_MEM_ACCESS;
1458 return n;
1459 }
1460
1461 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1462 if (n >= 0) {
1463 *flag = BP_MEM_WRITE;
1464 return n;
1465 }
1466
1467 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1468 if (n >= 0) {
1469 *flag = BP_MEM_READ;
1470 return n;
1471 }
1472
1473 return -1;
1474 }
1475
1476 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1477 target_ulong len, int type)
1478 {
1479 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1480 return -ENOBUFS;
1481 }
1482
1483 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1484 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1485
1486 switch (type) {
1487 case GDB_BREAKPOINT_HW:
1488 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1489 return -ENOBUFS;
1490 }
1491
1492 if (find_hw_breakpoint(addr, type) >= 0) {
1493 return -EEXIST;
1494 }
1495
1496 nb_hw_breakpoint++;
1497 break;
1498
1499 case GDB_WATCHPOINT_WRITE:
1500 case GDB_WATCHPOINT_READ:
1501 case GDB_WATCHPOINT_ACCESS:
1502 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1503 return -ENOBUFS;
1504 }
1505
1506 if (find_hw_breakpoint(addr, type) >= 0) {
1507 return -EEXIST;
1508 }
1509
1510 nb_hw_watchpoint++;
1511 break;
1512
1513 default:
1514 return -ENOSYS;
1515 }
1516
1517 return 0;
1518 }
1519
1520 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1521 target_ulong len, int type)
1522 {
1523 int n;
1524
1525 n = find_hw_breakpoint(addr, type);
1526 if (n < 0) {
1527 return -ENOENT;
1528 }
1529
1530 switch (type) {
1531 case GDB_BREAKPOINT_HW:
1532 nb_hw_breakpoint--;
1533 break;
1534
1535 case GDB_WATCHPOINT_WRITE:
1536 case GDB_WATCHPOINT_READ:
1537 case GDB_WATCHPOINT_ACCESS:
1538 nb_hw_watchpoint--;
1539 break;
1540
1541 default:
1542 return -ENOSYS;
1543 }
1544 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1545
1546 return 0;
1547 }
1548
1549 void kvm_arch_remove_all_hw_breakpoints(void)
1550 {
1551 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1552 }
1553
1554 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1555 {
1556 int n;
1557
1558 /* Software Breakpoint updates */
1559 if (kvm_sw_breakpoints_active(cs)) {
1560 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1561 }
1562
1563 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1564 <= ARRAY_SIZE(hw_debug_points));
1565 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1566
1567 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1568 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1569 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1570 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1571 switch (hw_debug_points[n].type) {
1572 case GDB_BREAKPOINT_HW:
1573 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1574 break;
1575 case GDB_WATCHPOINT_WRITE:
1576 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1577 break;
1578 case GDB_WATCHPOINT_READ:
1579 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1580 break;
1581 case GDB_WATCHPOINT_ACCESS:
1582 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1583 KVMPPC_DEBUG_WATCH_READ;
1584 break;
1585 default:
1586 cpu_abort(cs, "Unsupported breakpoint type\n");
1587 }
1588 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1589 }
1590 }
1591 }
1592
1593 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1594 {
1595 CPUState *cs = CPU(cpu);
1596 CPUPPCState *env = &cpu->env;
1597 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1598 int handle = 0;
1599 int n;
1600 int flag = 0;
1601
1602 if (cs->singlestep_enabled) {
1603 handle = 1;
1604 } else if (arch_info->status) {
1605 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1606 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1607 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1608 if (n >= 0) {
1609 handle = 1;
1610 }
1611 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1612 KVMPPC_DEBUG_WATCH_WRITE)) {
1613 n = find_hw_watchpoint(arch_info->address, &flag);
1614 if (n >= 0) {
1615 handle = 1;
1616 cs->watchpoint_hit = &hw_watchpoint;
1617 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1618 hw_watchpoint.flags = flag;
1619 }
1620 }
1621 }
1622 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1623 handle = 1;
1624 } else {
1625 /* QEMU is not able to handle debug exception, so inject
1626 * program exception to guest;
1627 * Yes program exception NOT debug exception !!
1628 * When QEMU is using debug resources then debug exception must
1629 * be always set. To achieve this we set MSR_DE and also set
1630 * MSRP_DEP so guest cannot change MSR_DE.
1631 * When emulating debug resource for guest we want guest
1632 * to control MSR_DE (enable/disable debug interrupt on need).
1633 * Supporting both configurations are NOT possible.
1634 * So the result is that we cannot share debug resources
1635 * between QEMU and Guest on BOOKE architecture.
1636 * In the current design QEMU gets the priority over guest,
1637 * this means that if QEMU is using debug resources then guest
1638 * cannot use them;
1639 * For software breakpoint QEMU uses a privileged instruction;
1640 * So there cannot be any reason that we are here for guest
1641 * set debug exception, only possibility is guest executed a
1642 * privileged / illegal instruction and that's why we are
1643 * injecting a program interrupt.
1644 */
1645
1646 cpu_synchronize_state(cs);
1647 /* env->nip is PC, so increment this by 4 to use
1648 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1649 */
1650 env->nip += 4;
1651 cs->exception_index = POWERPC_EXCP_PROGRAM;
1652 env->error_code = POWERPC_EXCP_INVAL;
1653 ppc_cpu_do_interrupt(cs);
1654 }
1655
1656 return handle;
1657 }
1658
1659 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1660 {
1661 PowerPCCPU *cpu = POWERPC_CPU(cs);
1662 CPUPPCState *env = &cpu->env;
1663 int ret;
1664
1665 qemu_mutex_lock_iothread();
1666
1667 switch (run->exit_reason) {
1668 case KVM_EXIT_DCR:
1669 if (run->dcr.is_write) {
1670 DPRINTF("handle dcr write\n");
1671 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672 } else {
1673 DPRINTF("handle dcr read\n");
1674 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1675 }
1676 break;
1677 case KVM_EXIT_HLT:
1678 DPRINTF("handle halt\n");
1679 ret = kvmppc_handle_halt(cpu);
1680 break;
1681 #if defined(TARGET_PPC64)
1682 case KVM_EXIT_PAPR_HCALL:
1683 DPRINTF("handle PAPR hypercall\n");
1684 run->papr_hcall.ret = spapr_hypercall(cpu,
1685 run->papr_hcall.nr,
1686 run->papr_hcall.args);
1687 ret = 0;
1688 break;
1689 #endif
1690 case KVM_EXIT_EPR:
1691 DPRINTF("handle epr\n");
1692 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693 ret = 0;
1694 break;
1695 case KVM_EXIT_WATCHDOG:
1696 DPRINTF("handle watchdog expiry\n");
1697 watchdog_perform_action();
1698 ret = 0;
1699 break;
1700
1701 case KVM_EXIT_DEBUG:
1702 DPRINTF("handle debug exception\n");
1703 if (kvm_handle_debug(cpu, run)) {
1704 ret = EXCP_DEBUG;
1705 break;
1706 }
1707 /* re-enter, this exception was guest-internal */
1708 ret = 0;
1709 break;
1710
1711 default:
1712 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713 ret = -1;
1714 break;
1715 }
1716
1717 qemu_mutex_unlock_iothread();
1718 return ret;
1719 }
1720
1721 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1722 {
1723 CPUState *cs = CPU(cpu);
1724 uint32_t bits = tsr_bits;
1725 struct kvm_one_reg reg = {
1726 .id = KVM_REG_PPC_OR_TSR,
1727 .addr = (uintptr_t) &bits,
1728 };
1729
1730 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1731 }
1732
1733 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1734 {
1735
1736 CPUState *cs = CPU(cpu);
1737 uint32_t bits = tsr_bits;
1738 struct kvm_one_reg reg = {
1739 .id = KVM_REG_PPC_CLEAR_TSR,
1740 .addr = (uintptr_t) &bits,
1741 };
1742
1743 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1744 }
1745
1746 int kvmppc_set_tcr(PowerPCCPU *cpu)
1747 {
1748 CPUState *cs = CPU(cpu);
1749 CPUPPCState *env = &cpu->env;
1750 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1751
1752 struct kvm_one_reg reg = {
1753 .id = KVM_REG_PPC_TCR,
1754 .addr = (uintptr_t) &tcr,
1755 };
1756
1757 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758 }
1759
1760 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1761 {
1762 CPUState *cs = CPU(cpu);
1763 int ret;
1764
1765 if (!kvm_enabled()) {
1766 return -1;
1767 }
1768
1769 if (!cap_ppc_watchdog) {
1770 printf("warning: KVM does not support watchdog");
1771 return -1;
1772 }
1773
1774 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775 if (ret < 0) {
1776 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777 __func__, strerror(-ret));
1778 return ret;
1779 }
1780
1781 return ret;
1782 }
1783
1784 static int read_cpuinfo(const char *field, char *value, int len)
1785 {
1786 FILE *f;
1787 int ret = -1;
1788 int field_len = strlen(field);
1789 char line[512];
1790
1791 f = fopen("/proc/cpuinfo", "r");
1792 if (!f) {
1793 return -1;
1794 }
1795
1796 do {
1797 if (!fgets(line, sizeof(line), f)) {
1798 break;
1799 }
1800 if (!strncmp(line, field, field_len)) {
1801 pstrcpy(value, len, line);
1802 ret = 0;
1803 break;
1804 }
1805 } while(*line);
1806
1807 fclose(f);
1808
1809 return ret;
1810 }
1811
1812 uint32_t kvmppc_get_tbfreq(void)
1813 {
1814 char line[512];
1815 char *ns;
1816 uint32_t retval = NANOSECONDS_PER_SECOND;
1817
1818 if (read_cpuinfo("timebase", line, sizeof(line))) {
1819 return retval;
1820 }
1821
1822 if (!(ns = strchr(line, ':'))) {
1823 return retval;
1824 }
1825
1826 ns++;
1827
1828 return atoi(ns);
1829 }
1830
1831 bool kvmppc_get_host_serial(char **value)
1832 {
1833 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1834 NULL);
1835 }
1836
1837 bool kvmppc_get_host_model(char **value)
1838 {
1839 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1840 }
1841
1842 /* Try to find a device tree node for a CPU with clock-frequency property */
1843 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1844 {
1845 struct dirent *dirp;
1846 DIR *dp;
1847
1848 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1849 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1850 return -1;
1851 }
1852
1853 buf[0] = '\0';
1854 while ((dirp = readdir(dp)) != NULL) {
1855 FILE *f;
1856 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1857 dirp->d_name);
1858 f = fopen(buf, "r");
1859 if (f) {
1860 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1861 fclose(f);
1862 break;
1863 }
1864 buf[0] = '\0';
1865 }
1866 closedir(dp);
1867 if (buf[0] == '\0') {
1868 printf("Unknown host!\n");
1869 return -1;
1870 }
1871
1872 return 0;
1873 }
1874
1875 static uint64_t kvmppc_read_int_dt(const char *filename)
1876 {
1877 union {
1878 uint32_t v32;
1879 uint64_t v64;
1880 } u;
1881 FILE *f;
1882 int len;
1883
1884 f = fopen(filename, "rb");
1885 if (!f) {
1886 return -1;
1887 }
1888
1889 len = fread(&u, 1, sizeof(u), f);
1890 fclose(f);
1891 switch (len) {
1892 case 4:
1893 /* property is a 32-bit quantity */
1894 return be32_to_cpu(u.v32);
1895 case 8:
1896 return be64_to_cpu(u.v64);
1897 }
1898
1899 return 0;
1900 }
1901
1902 /* Read a CPU node property from the host device tree that's a single
1903 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1904 * (can't find or open the property, or doesn't understand the
1905 * format) */
1906 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1907 {
1908 char buf[PATH_MAX], *tmp;
1909 uint64_t val;
1910
1911 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1912 return -1;
1913 }
1914
1915 tmp = g_strdup_printf("%s/%s", buf, propname);
1916 val = kvmppc_read_int_dt(tmp);
1917 g_free(tmp);
1918
1919 return val;
1920 }
1921
1922 uint64_t kvmppc_get_clockfreq(void)
1923 {
1924 return kvmppc_read_int_cpu_dt("clock-frequency");
1925 }
1926
1927 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1928 {
1929 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1930 CPUState *cs = CPU(cpu);
1931
1932 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1933 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1934 return 0;
1935 }
1936
1937 return 1;
1938 }
1939
1940 int kvmppc_get_hasidle(CPUPPCState *env)
1941 {
1942 struct kvm_ppc_pvinfo pvinfo;
1943
1944 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1945 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1946 return 1;
1947 }
1948
1949 return 0;
1950 }
1951
1952 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1953 {
1954 uint32_t *hc = (uint32_t*)buf;
1955 struct kvm_ppc_pvinfo pvinfo;
1956
1957 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1958 memcpy(buf, pvinfo.hcall, buf_len);
1959 return 0;
1960 }
1961
1962 /*
1963 * Fallback to always fail hypercalls regardless of endianness:
1964 *
1965 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1966 * li r3, -1
1967 * b .+8 (becomes nop in wrong endian)
1968 * bswap32(li r3, -1)
1969 */
1970
1971 hc[0] = cpu_to_be32(0x08000048);
1972 hc[1] = cpu_to_be32(0x3860ffff);
1973 hc[2] = cpu_to_be32(0x48000008);
1974 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1975
1976 return 1;
1977 }
1978
1979 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1980 {
1981 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1982 }
1983
1984 void kvmppc_enable_logical_ci_hcalls(void)
1985 {
1986 /*
1987 * FIXME: it would be nice if we could detect the cases where
1988 * we're using a device which requires the in kernel
1989 * implementation of these hcalls, but the kernel lacks them and
1990 * produce a warning.
1991 */
1992 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1993 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1994 }
1995
1996 void kvmppc_enable_set_mode_hcall(void)
1997 {
1998 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1999 }
2000
2001 void kvmppc_enable_clear_ref_mod_hcalls(void)
2002 {
2003 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2004 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2005 }
2006
2007 void kvmppc_set_papr(PowerPCCPU *cpu)
2008 {
2009 CPUState *cs = CPU(cpu);
2010 int ret;
2011
2012 if (!kvm_enabled()) {
2013 return;
2014 }
2015
2016 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2017 if (ret) {
2018 error_report("This vCPU type or KVM version does not support PAPR");
2019 exit(1);
2020 }
2021
2022 /* Update the capability flag so we sync the right information
2023 * with kvm */
2024 cap_papr = 1;
2025 }
2026
2027 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2028 {
2029 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2030 }
2031
2032 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2033 {
2034 CPUState *cs = CPU(cpu);
2035 int ret;
2036
2037 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2038 if (ret && mpic_proxy) {
2039 error_report("This KVM version does not support EPR");
2040 exit(1);
2041 }
2042 }
2043
2044 int kvmppc_smt_threads(void)
2045 {
2046 return cap_ppc_smt ? cap_ppc_smt : 1;
2047 }
2048
2049 int kvmppc_set_smt_threads(int smt)
2050 {
2051 int ret;
2052
2053 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2054 if (!ret) {
2055 cap_ppc_smt = smt;
2056 }
2057 return ret;
2058 }
2059
2060 void kvmppc_hint_smt_possible(Error **errp)
2061 {
2062 int i;
2063 GString *g;
2064 char *s;
2065
2066 assert(kvm_enabled());
2067 if (cap_ppc_smt_possible) {
2068 g = g_string_new("Available VSMT modes:");
2069 for (i = 63; i >= 0; i--) {
2070 if ((1UL << i) & cap_ppc_smt_possible) {
2071 g_string_append_printf(g, " %lu", (1UL << i));
2072 }
2073 }
2074 s = g_string_free(g, false);
2075 error_append_hint(errp, "%s.\n", s);
2076 g_free(s);
2077 } else {
2078 error_append_hint(errp,
2079 "This KVM seems to be too old to support VSMT.\n");
2080 }
2081 }
2082
2083
2084 #ifdef TARGET_PPC64
2085 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2086 {
2087 struct kvm_ppc_smmu_info info;
2088 long rampagesize, best_page_shift;
2089 int i;
2090
2091 /* Find the largest hardware supported page size that's less than
2092 * or equal to the (logical) backing page size of guest RAM */
2093 kvm_get_smmu_info(&info, &error_fatal);
2094 rampagesize = qemu_getrampagesize();
2095 best_page_shift = 0;
2096
2097 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2098 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2099
2100 if (!sps->page_shift) {
2101 continue;
2102 }
2103
2104 if ((sps->page_shift > best_page_shift)
2105 && ((1UL << sps->page_shift) <= rampagesize)) {
2106 best_page_shift = sps->page_shift;
2107 }
2108 }
2109
2110 return MIN(current_size,
2111 1ULL << (best_page_shift + hash_shift - 7));
2112 }
2113 #endif
2114
2115 bool kvmppc_spapr_use_multitce(void)
2116 {
2117 return cap_spapr_multitce;
2118 }
2119
2120 int kvmppc_spapr_enable_inkernel_multitce(void)
2121 {
2122 int ret;
2123
2124 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2125 H_PUT_TCE_INDIRECT, 1);
2126 if (!ret) {
2127 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128 H_STUFF_TCE, 1);
2129 }
2130
2131 return ret;
2132 }
2133
2134 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2135 uint64_t bus_offset, uint32_t nb_table,
2136 int *pfd, bool need_vfio)
2137 {
2138 long len;
2139 int fd;
2140 void *table;
2141
2142 /* Must set fd to -1 so we don't try to munmap when called for
2143 * destroying the table, which the upper layers -will- do
2144 */
2145 *pfd = -1;
2146 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2147 return NULL;
2148 }
2149
2150 if (cap_spapr_tce_64) {
2151 struct kvm_create_spapr_tce_64 args = {
2152 .liobn = liobn,
2153 .page_shift = page_shift,
2154 .offset = bus_offset >> page_shift,
2155 .size = nb_table,
2156 .flags = 0
2157 };
2158 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2159 if (fd < 0) {
2160 fprintf(stderr,
2161 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2162 liobn);
2163 return NULL;
2164 }
2165 } else if (cap_spapr_tce) {
2166 uint64_t window_size = (uint64_t) nb_table << page_shift;
2167 struct kvm_create_spapr_tce args = {
2168 .liobn = liobn,
2169 .window_size = window_size,
2170 };
2171 if ((window_size != args.window_size) || bus_offset) {
2172 return NULL;
2173 }
2174 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2175 if (fd < 0) {
2176 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2177 liobn);
2178 return NULL;
2179 }
2180 } else {
2181 return NULL;
2182 }
2183
2184 len = nb_table * sizeof(uint64_t);
2185 /* FIXME: round this up to page size */
2186
2187 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2188 if (table == MAP_FAILED) {
2189 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2190 liobn);
2191 close(fd);
2192 return NULL;
2193 }
2194
2195 *pfd = fd;
2196 return table;
2197 }
2198
2199 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2200 {
2201 long len;
2202
2203 if (fd < 0) {
2204 return -1;
2205 }
2206
2207 len = nb_table * sizeof(uint64_t);
2208 if ((munmap(table, len) < 0) ||
2209 (close(fd) < 0)) {
2210 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2211 strerror(errno));
2212 /* Leak the table */
2213 }
2214
2215 return 0;
2216 }
2217
2218 int kvmppc_reset_htab(int shift_hint)
2219 {
2220 uint32_t shift = shift_hint;
2221
2222 if (!kvm_enabled()) {
2223 /* Full emulation, tell caller to allocate htab itself */
2224 return 0;
2225 }
2226 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2227 int ret;
2228 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2229 if (ret == -ENOTTY) {
2230 /* At least some versions of PR KVM advertise the
2231 * capability, but don't implement the ioctl(). Oops.
2232 * Return 0 so that we allocate the htab in qemu, as is
2233 * correct for PR. */
2234 return 0;
2235 } else if (ret < 0) {
2236 return ret;
2237 }
2238 return shift;
2239 }
2240
2241 /* We have a kernel that predates the htab reset calls. For PR
2242 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2243 * this era, it has allocated a 16MB fixed size hash table already. */
2244 if (kvmppc_is_pr(kvm_state)) {
2245 /* PR - tell caller to allocate htab */
2246 return 0;
2247 } else {
2248 /* HV - assume 16MB kernel allocated htab */
2249 return 24;
2250 }
2251 }
2252
2253 static inline uint32_t mfpvr(void)
2254 {
2255 uint32_t pvr;
2256
2257 asm ("mfpvr %0"
2258 : "=r"(pvr));
2259 return pvr;
2260 }
2261
2262 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2263 {
2264 if (on) {
2265 *word |= flags;
2266 } else {
2267 *word &= ~flags;
2268 }
2269 }
2270
2271 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272 {
2273 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2275 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2276
2277 /* Now fix up the class with information we can query from the host */
2278 pcc->pvr = mfpvr();
2279
2280 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2281 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2282 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2283 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2284 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2285 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2286
2287 if (dcache_size != -1) {
2288 pcc->l1_dcache_size = dcache_size;
2289 }
2290
2291 if (icache_size != -1) {
2292 pcc->l1_icache_size = icache_size;
2293 }
2294
2295 #if defined(TARGET_PPC64)
2296 pcc->radix_page_info = kvm_get_radix_page_info();
2297
2298 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2299 /*
2300 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2301 * compliant. More importantly, advertising ISA 3.00
2302 * architected mode may prevent guests from activating
2303 * necessary DD1 workarounds.
2304 */
2305 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2306 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2307 }
2308 #endif /* defined(TARGET_PPC64) */
2309 }
2310
2311 bool kvmppc_has_cap_epr(void)
2312 {
2313 return cap_epr;
2314 }
2315
2316 bool kvmppc_has_cap_fixup_hcalls(void)
2317 {
2318 return cap_fixup_hcalls;
2319 }
2320
2321 bool kvmppc_has_cap_htm(void)
2322 {
2323 return cap_htm;
2324 }
2325
2326 bool kvmppc_has_cap_mmu_radix(void)
2327 {
2328 return cap_mmu_radix;
2329 }
2330
2331 bool kvmppc_has_cap_mmu_hash_v3(void)
2332 {
2333 return cap_mmu_hash_v3;
2334 }
2335
2336 static bool kvmppc_power8_host(void)
2337 {
2338 bool ret = false;
2339 #ifdef TARGET_PPC64
2340 {
2341 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2342 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2343 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2344 (base_pvr == CPU_POWERPC_POWER8_BASE);
2345 }
2346 #endif /* TARGET_PPC64 */
2347 return ret;
2348 }
2349
2350 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2351 {
2352 bool l1d_thread_priv_req = !kvmppc_power8_host();
2353
2354 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2355 return 2;
2356 } else if ((!l1d_thread_priv_req ||
2357 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2358 (c.character & c.character_mask
2359 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2360 return 1;
2361 }
2362
2363 return 0;
2364 }
2365
2366 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2367 {
2368 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2369 return 2;
2370 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2371 return 1;
2372 }
2373
2374 return 0;
2375 }
2376
2377 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2378 {
2379 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2380 return SPAPR_CAP_FIXED_CCD;
2381 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2382 return SPAPR_CAP_FIXED_IBS;
2383 }
2384
2385 return 0;
2386 }
2387
2388 static void kvmppc_get_cpu_characteristics(KVMState *s)
2389 {
2390 struct kvm_ppc_cpu_char c;
2391 int ret;
2392
2393 /* Assume broken */
2394 cap_ppc_safe_cache = 0;
2395 cap_ppc_safe_bounds_check = 0;
2396 cap_ppc_safe_indirect_branch = 0;
2397
2398 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2399 if (!ret) {
2400 return;
2401 }
2402 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2403 if (ret < 0) {
2404 return;
2405 }
2406
2407 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2408 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2409 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2410 }
2411
2412 int kvmppc_get_cap_safe_cache(void)
2413 {
2414 return cap_ppc_safe_cache;
2415 }
2416
2417 int kvmppc_get_cap_safe_bounds_check(void)
2418 {
2419 return cap_ppc_safe_bounds_check;
2420 }
2421
2422 int kvmppc_get_cap_safe_indirect_branch(void)
2423 {
2424 return cap_ppc_safe_indirect_branch;
2425 }
2426
2427 bool kvmppc_has_cap_nested_kvm_hv(void)
2428 {
2429 return !!cap_ppc_nested_kvm_hv;
2430 }
2431
2432 int kvmppc_set_cap_nested_kvm_hv(int enable)
2433 {
2434 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2435 }
2436
2437 bool kvmppc_has_cap_spapr_vfio(void)
2438 {
2439 return cap_spapr_vfio;
2440 }
2441
2442 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2443 {
2444 uint32_t host_pvr = mfpvr();
2445 PowerPCCPUClass *pvr_pcc;
2446
2447 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2448 if (pvr_pcc == NULL) {
2449 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2450 }
2451
2452 return pvr_pcc;
2453 }
2454
2455 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2456 {
2457 TypeInfo type_info = {
2458 .name = TYPE_HOST_POWERPC_CPU,
2459 .class_init = kvmppc_host_cpu_class_init,
2460 };
2461 MachineClass *mc = MACHINE_GET_CLASS(ms);
2462 PowerPCCPUClass *pvr_pcc;
2463 ObjectClass *oc;
2464 DeviceClass *dc;
2465 int i;
2466
2467 pvr_pcc = kvm_ppc_get_host_cpu_class();
2468 if (pvr_pcc == NULL) {
2469 return -1;
2470 }
2471 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2472 type_register(&type_info);
2473 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2474 /* override TCG default cpu type with 'host' cpu model */
2475 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2476 }
2477
2478 oc = object_class_by_name(type_info.name);
2479 g_assert(oc);
2480
2481 /*
2482 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2483 * we want "POWER8" to be a "family" alias that points to the current
2484 * host CPU type, too)
2485 */
2486 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2487 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2488 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2489 char *suffix;
2490
2491 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2492 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2493 if (suffix) {
2494 *suffix = 0;
2495 }
2496 break;
2497 }
2498 }
2499
2500 return 0;
2501 }
2502
2503 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2504 {
2505 struct kvm_rtas_token_args args = {
2506 .token = token,
2507 };
2508
2509 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2510 return -ENOENT;
2511 }
2512
2513 strncpy(args.name, function, sizeof(args.name));
2514
2515 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2516 }
2517
2518 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2519 {
2520 struct kvm_get_htab_fd s = {
2521 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2522 .start_index = index,
2523 };
2524 int ret;
2525
2526 if (!cap_htab_fd) {
2527 error_setg(errp, "KVM version doesn't support %s the HPT",
2528 write ? "writing" : "reading");
2529 return -ENOTSUP;
2530 }
2531
2532 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2533 if (ret < 0) {
2534 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2535 write ? "writing" : "reading", write ? "to" : "from",
2536 strerror(errno));
2537 return -errno;
2538 }
2539
2540 return ret;
2541 }
2542
2543 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2544 {
2545 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2546 uint8_t buf[bufsize];
2547 ssize_t rc;
2548
2549 do {
2550 rc = read(fd, buf, bufsize);
2551 if (rc < 0) {
2552 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2553 strerror(errno));
2554 return rc;
2555 } else if (rc) {
2556 uint8_t *buffer = buf;
2557 ssize_t n = rc;
2558 while (n) {
2559 struct kvm_get_htab_header *head =
2560 (struct kvm_get_htab_header *) buffer;
2561 size_t chunksize = sizeof(*head) +
2562 HASH_PTE_SIZE_64 * head->n_valid;
2563
2564 qemu_put_be32(f, head->index);
2565 qemu_put_be16(f, head->n_valid);
2566 qemu_put_be16(f, head->n_invalid);
2567 qemu_put_buffer(f, (void *)(head + 1),
2568 HASH_PTE_SIZE_64 * head->n_valid);
2569
2570 buffer += chunksize;
2571 n -= chunksize;
2572 }
2573 }
2574 } while ((rc != 0)
2575 && ((max_ns < 0)
2576 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2577
2578 return (rc == 0) ? 1 : 0;
2579 }
2580
2581 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2582 uint16_t n_valid, uint16_t n_invalid)
2583 {
2584 struct kvm_get_htab_header *buf;
2585 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2586 ssize_t rc;
2587
2588 buf = alloca(chunksize);
2589 buf->index = index;
2590 buf->n_valid = n_valid;
2591 buf->n_invalid = n_invalid;
2592
2593 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2594
2595 rc = write(fd, buf, chunksize);
2596 if (rc < 0) {
2597 fprintf(stderr, "Error writing KVM hash table: %s\n",
2598 strerror(errno));
2599 return rc;
2600 }
2601 if (rc != chunksize) {
2602 /* We should never get a short write on a single chunk */
2603 fprintf(stderr, "Short write, restoring KVM hash table\n");
2604 return -1;
2605 }
2606 return 0;
2607 }
2608
2609 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2610 {
2611 return true;
2612 }
2613
2614 void kvm_arch_init_irq_routing(KVMState *s)
2615 {
2616 }
2617
2618 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2619 {
2620 int fd, rc;
2621 int i;
2622
2623 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2624
2625 i = 0;
2626 while (i < n) {
2627 struct kvm_get_htab_header *hdr;
2628 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2629 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2630
2631 rc = read(fd, buf, sizeof(buf));
2632 if (rc < 0) {
2633 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2634 }
2635
2636 hdr = (struct kvm_get_htab_header *)buf;
2637 while ((i < n) && ((char *)hdr < (buf + rc))) {
2638 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2639
2640 if (hdr->index != (ptex + i)) {
2641 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2642 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2643 }
2644
2645 if (n - i < valid) {
2646 valid = n - i;
2647 }
2648 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2649 i += valid;
2650
2651 if ((n - i) < invalid) {
2652 invalid = n - i;
2653 }
2654 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2655 i += invalid;
2656
2657 hdr = (struct kvm_get_htab_header *)
2658 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2659 }
2660 }
2661
2662 close(fd);
2663 }
2664
2665 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2666 {
2667 int fd, rc;
2668 struct {
2669 struct kvm_get_htab_header hdr;
2670 uint64_t pte0;
2671 uint64_t pte1;
2672 } buf;
2673
2674 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2675
2676 buf.hdr.n_valid = 1;
2677 buf.hdr.n_invalid = 0;
2678 buf.hdr.index = ptex;
2679 buf.pte0 = cpu_to_be64(pte0);
2680 buf.pte1 = cpu_to_be64(pte1);
2681
2682 rc = write(fd, &buf, sizeof(buf));
2683 if (rc != sizeof(buf)) {
2684 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2685 }
2686 close(fd);
2687 }
2688
2689 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2690 uint64_t address, uint32_t data, PCIDevice *dev)
2691 {
2692 return 0;
2693 }
2694
2695 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2696 int vector, PCIDevice *dev)
2697 {
2698 return 0;
2699 }
2700
2701 int kvm_arch_release_virq_post(int virq)
2702 {
2703 return 0;
2704 }
2705
2706 int kvm_arch_msi_data_to_gsi(uint32_t data)
2707 {
2708 return data & 0xffff;
2709 }
2710
2711 int kvmppc_enable_hwrng(void)
2712 {
2713 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2714 return -1;
2715 }
2716
2717 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2718 }
2719
2720 void kvmppc_check_papr_resize_hpt(Error **errp)
2721 {
2722 if (!kvm_enabled()) {
2723 return; /* No KVM, we're good */
2724 }
2725
2726 if (cap_resize_hpt) {
2727 return; /* Kernel has explicit support, we're good */
2728 }
2729
2730 /* Otherwise fallback on looking for PR KVM */
2731 if (kvmppc_is_pr(kvm_state)) {
2732 return;
2733 }
2734
2735 error_setg(errp,
2736 "Hash page table resizing not available with this KVM version");
2737 }
2738
2739 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2740 {
2741 CPUState *cs = CPU(cpu);
2742 struct kvm_ppc_resize_hpt rhpt = {
2743 .flags = flags,
2744 .shift = shift,
2745 };
2746
2747 if (!cap_resize_hpt) {
2748 return -ENOSYS;
2749 }
2750
2751 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2752 }
2753
2754 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2755 {
2756 CPUState *cs = CPU(cpu);
2757 struct kvm_ppc_resize_hpt rhpt = {
2758 .flags = flags,
2759 .shift = shift,
2760 };
2761
2762 if (!cap_resize_hpt) {
2763 return -ENOSYS;
2764 }
2765
2766 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2767 }
2768
2769 /*
2770 * This is a helper function to detect a post migration scenario
2771 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2772 * the guest kernel can't handle a PVR value other than the actual host
2773 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2774 *
2775 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2776 * (so, we're HV), return true. The workaround itself is done in
2777 * cpu_post_load.
2778 *
2779 * The order here is important: we'll only check for KVM PR as a
2780 * fallback if the guest kernel can't handle the situation itself.
2781 * We need to avoid as much as possible querying the running KVM type
2782 * in QEMU level.
2783 */
2784 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2785 {
2786 CPUState *cs = CPU(cpu);
2787
2788 if (!kvm_enabled()) {
2789 return false;
2790 }
2791
2792 if (cap_ppc_pvr_compat) {
2793 return false;
2794 }
2795
2796 return !kvmppc_is_pr(cs->kvm_state);
2797 }
2798
2799 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2800 {
2801 CPUState *cs = CPU(cpu);
2802
2803 if (kvm_enabled()) {
2804 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2805 }
2806 }