]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/kvm.c
Merge remote-tracking branch 'remotes/ehabkost/tags/python-next-pull-request' into...
[mirror_qemu.git] / target / ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #include "elf.h"
50 #include "sysemu/kvm_int.h"
51
52 //#define DEBUG_KVM
53
54 #ifdef DEBUG_KVM
55 #define DPRINTF(fmt, ...) \
56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #else
58 #define DPRINTF(fmt, ...) \
59 do { } while (0)
60 #endif
61
62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
65 KVM_CAP_LAST_INFO
66 };
67
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_smt_possible;
74 static int cap_spapr_tce;
75 static int cap_spapr_tce_64;
76 static int cap_spapr_multitce;
77 static int cap_spapr_vfio;
78 static int cap_hior;
79 static int cap_one_reg;
80 static int cap_epr;
81 static int cap_ppc_watchdog;
82 static int cap_papr;
83 static int cap_htab_fd;
84 static int cap_fixup_hcalls;
85 static int cap_htm; /* Hardware transactional memory support */
86 static int cap_mmu_radix;
87 static int cap_mmu_hash_v3;
88 static int cap_resize_hpt;
89 static int cap_ppc_pvr_compat;
90 static int cap_ppc_safe_cache;
91 static int cap_ppc_safe_bounds_check;
92 static int cap_ppc_safe_indirect_branch;
93 static int cap_ppc_count_cache_flush_assist;
94 static int cap_ppc_nested_kvm_hv;
95 static int cap_large_decr;
96
97 static uint32_t debug_inst_opcode;
98
99 /* XXX We have a race condition where we actually have a level triggered
100 * interrupt, but the infrastructure can't expose that yet, so the guest
101 * takes but ignores it, goes to sleep and never gets notified that there's
102 * still an interrupt pending.
103 *
104 * As a quick workaround, let's just wake up again 20 ms after we injected
105 * an interrupt. That way we can assure that we're always reinjecting
106 * interrupts in case the guest swallowed them.
107 */
108 static QEMUTimer *idle_timer;
109
110 static void kvm_kick_cpu(void *opaque)
111 {
112 PowerPCCPU *cpu = opaque;
113
114 qemu_cpu_kick(CPU(cpu));
115 }
116
117 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
118 * should only be used for fallback tests - generally we should use
119 * explicit capabilities for the features we want, rather than
120 * assuming what is/isn't available depending on the KVM variant. */
121 static bool kvmppc_is_pr(KVMState *ks)
122 {
123 /* Assume KVM-PR if the GET_PVINFO capability is available */
124 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
125 }
126
127 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
128 static void kvmppc_get_cpu_characteristics(KVMState *s);
129 static int kvmppc_get_dec_bits(void);
130
131 int kvm_arch_init(MachineState *ms, KVMState *s)
132 {
133 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
134 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
135 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
136 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
137 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
138 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
139 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
140 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
141 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
142 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
143 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
144 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
145 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
146 /* Note: we don't set cap_papr here, because this capability is
147 * only activated after this by kvmppc_set_papr() */
148 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
149 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
150 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
151 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
152 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
153 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
154 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
155 kvmppc_get_cpu_characteristics(s);
156 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
157 cap_large_decr = kvmppc_get_dec_bits();
158 /*
159 * Note: setting it to false because there is not such capability
160 * in KVM at this moment.
161 *
162 * TODO: call kvm_vm_check_extension() with the right capability
163 * after the kernel starts implementing it.*/
164 cap_ppc_pvr_compat = false;
165
166 if (!cap_interrupt_level) {
167 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
168 "VM to stall at times!\n");
169 }
170
171 kvm_ppc_register_host_cpu_type(ms);
172
173 return 0;
174 }
175
176 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
177 {
178 return 0;
179 }
180
181 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
182 {
183 CPUPPCState *cenv = &cpu->env;
184 CPUState *cs = CPU(cpu);
185 struct kvm_sregs sregs;
186 int ret;
187
188 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
189 /* What we're really trying to say is "if we're on BookE, we use
190 the native PVR for now". This is the only sane way to check
191 it though, so we potentially confuse users that they can run
192 BookE guests on BookS. Let's hope nobody dares enough :) */
193 return 0;
194 } else {
195 if (!cap_segstate) {
196 fprintf(stderr, "kvm error: missing PVR setting capability\n");
197 return -ENOSYS;
198 }
199 }
200
201 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
202 if (ret) {
203 return ret;
204 }
205
206 sregs.pvr = cenv->spr[SPR_PVR];
207 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
208 }
209
210 /* Set up a shared TLB array with KVM */
211 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
212 {
213 CPUPPCState *env = &cpu->env;
214 CPUState *cs = CPU(cpu);
215 struct kvm_book3e_206_tlb_params params = {};
216 struct kvm_config_tlb cfg = {};
217 unsigned int entries = 0;
218 int ret, i;
219
220 if (!kvm_enabled() ||
221 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
222 return 0;
223 }
224
225 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
226
227 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
228 params.tlb_sizes[i] = booke206_tlb_size(env, i);
229 params.tlb_ways[i] = booke206_tlb_ways(env, i);
230 entries += params.tlb_sizes[i];
231 }
232
233 assert(entries == env->nb_tlb);
234 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
235
236 env->tlb_dirty = true;
237
238 cfg.array = (uintptr_t)env->tlb.tlbm;
239 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
240 cfg.params = (uintptr_t)&params;
241 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
242
243 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
244 if (ret < 0) {
245 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
246 __func__, strerror(-ret));
247 return ret;
248 }
249
250 env->kvm_sw_tlb = true;
251 return 0;
252 }
253
254
255 #if defined(TARGET_PPC64)
256 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
257 {
258 int ret;
259
260 assert(kvm_state != NULL);
261
262 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
263 error_setg(errp, "KVM doesn't expose the MMU features it supports");
264 error_append_hint(errp, "Consider switching to a newer KVM\n");
265 return;
266 }
267
268 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
269 if (ret == 0) {
270 return;
271 }
272
273 error_setg_errno(errp, -ret,
274 "KVM failed to provide the MMU features it supports");
275 }
276
277 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
278 {
279 KVMState *s = KVM_STATE(current_machine->accelerator);
280 struct ppc_radix_page_info *radix_page_info;
281 struct kvm_ppc_rmmu_info rmmu_info;
282 int i;
283
284 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
285 return NULL;
286 }
287 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
288 return NULL;
289 }
290 radix_page_info = g_malloc0(sizeof(*radix_page_info));
291 radix_page_info->count = 0;
292 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
293 if (rmmu_info.ap_encodings[i]) {
294 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
295 radix_page_info->count++;
296 }
297 }
298 return radix_page_info;
299 }
300
301 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
302 bool radix, bool gtse,
303 uint64_t proc_tbl)
304 {
305 CPUState *cs = CPU(cpu);
306 int ret;
307 uint64_t flags = 0;
308 struct kvm_ppc_mmuv3_cfg cfg = {
309 .process_table = proc_tbl,
310 };
311
312 if (radix) {
313 flags |= KVM_PPC_MMUV3_RADIX;
314 }
315 if (gtse) {
316 flags |= KVM_PPC_MMUV3_GTSE;
317 }
318 cfg.flags = flags;
319 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
320 switch (ret) {
321 case 0:
322 return H_SUCCESS;
323 case -EINVAL:
324 return H_PARAMETER;
325 case -ENODEV:
326 return H_NOT_AVAILABLE;
327 default:
328 return H_HARDWARE;
329 }
330 }
331
332 bool kvmppc_hpt_needs_host_contiguous_pages(void)
333 {
334 static struct kvm_ppc_smmu_info smmu_info;
335
336 if (!kvm_enabled()) {
337 return false;
338 }
339
340 kvm_get_smmu_info(&smmu_info, &error_fatal);
341 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
342 }
343
344 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
345 {
346 struct kvm_ppc_smmu_info smmu_info;
347 int iq, ik, jq, jk;
348 Error *local_err = NULL;
349
350 /* For now, we only have anything to check on hash64 MMUs */
351 if (!cpu->hash64_opts || !kvm_enabled()) {
352 return;
353 }
354
355 kvm_get_smmu_info(&smmu_info, &local_err);
356 if (local_err) {
357 error_propagate(errp, local_err);
358 return;
359 }
360
361 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
362 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
363 error_setg(errp,
364 "KVM does not support 1TiB segments which guest expects");
365 return;
366 }
367
368 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
369 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
370 smmu_info.slb_size, cpu->hash64_opts->slb_size);
371 return;
372 }
373
374 /*
375 * Verify that every pagesize supported by the cpu model is
376 * supported by KVM with the same encodings
377 */
378 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
379 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
380 struct kvm_ppc_one_seg_page_size *ksps;
381
382 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
383 if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
384 break;
385 }
386 }
387 if (ik >= ARRAY_SIZE(smmu_info.sps)) {
388 error_setg(errp, "KVM doesn't support for base page shift %u",
389 qsps->page_shift);
390 return;
391 }
392
393 ksps = &smmu_info.sps[ik];
394 if (ksps->slb_enc != qsps->slb_enc) {
395 error_setg(errp,
396 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
397 ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
398 return;
399 }
400
401 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
402 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
403 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
404 break;
405 }
406 }
407
408 if (jk >= ARRAY_SIZE(ksps->enc)) {
409 error_setg(errp, "KVM doesn't support page shift %u/%u",
410 qsps->enc[jq].page_shift, qsps->page_shift);
411 return;
412 }
413 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
414 error_setg(errp,
415 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
416 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
417 qsps->page_shift, qsps->enc[jq].pte_enc);
418 return;
419 }
420 }
421 }
422
423 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
424 /* Mostly what guest pagesizes we can use are related to the
425 * host pages used to map guest RAM, which is handled in the
426 * platform code. Cache-Inhibited largepages (64k) however are
427 * used for I/O, so if they're mapped to the host at all it
428 * will be a normal mapping, not a special hugepage one used
429 * for RAM. */
430 if (getpagesize() < 0x10000) {
431 error_setg(errp,
432 "KVM can't supply 64kiB CI pages, which guest expects");
433 }
434 }
435 }
436 #endif /* !defined (TARGET_PPC64) */
437
438 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
439 {
440 return POWERPC_CPU(cpu)->vcpu_id;
441 }
442
443 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
444 * book3s supports only 1 watchpoint, so array size
445 * of 4 is sufficient for now.
446 */
447 #define MAX_HW_BKPTS 4
448
449 static struct HWBreakpoint {
450 target_ulong addr;
451 int type;
452 } hw_debug_points[MAX_HW_BKPTS];
453
454 static CPUWatchpoint hw_watchpoint;
455
456 /* Default there is no breakpoint and watchpoint supported */
457 static int max_hw_breakpoint;
458 static int max_hw_watchpoint;
459 static int nb_hw_breakpoint;
460 static int nb_hw_watchpoint;
461
462 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
463 {
464 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
465 max_hw_breakpoint = 2;
466 max_hw_watchpoint = 2;
467 }
468
469 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
470 fprintf(stderr, "Error initializing h/w breakpoints\n");
471 return;
472 }
473 }
474
475 int kvm_arch_init_vcpu(CPUState *cs)
476 {
477 PowerPCCPU *cpu = POWERPC_CPU(cs);
478 CPUPPCState *cenv = &cpu->env;
479 int ret;
480
481 /* Synchronize sregs with kvm */
482 ret = kvm_arch_sync_sregs(cpu);
483 if (ret) {
484 if (ret == -EINVAL) {
485 error_report("Register sync failed... If you're using kvm-hv.ko,"
486 " only \"-cpu host\" is possible");
487 }
488 return ret;
489 }
490
491 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
492
493 switch (cenv->mmu_model) {
494 case POWERPC_MMU_BOOKE206:
495 /* This target supports access to KVM's guest TLB */
496 ret = kvm_booke206_tlb_init(cpu);
497 break;
498 case POWERPC_MMU_2_07:
499 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
500 /* KVM-HV has transactional memory on POWER8 also without the
501 * KVM_CAP_PPC_HTM extension, so enable it here instead as
502 * long as it's availble to userspace on the host. */
503 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
504 cap_htm = true;
505 }
506 }
507 break;
508 default:
509 break;
510 }
511
512 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
513 kvmppc_hw_debug_points_init(cenv);
514
515 return ret;
516 }
517
518 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
519 {
520 CPUPPCState *env = &cpu->env;
521 CPUState *cs = CPU(cpu);
522 struct kvm_dirty_tlb dirty_tlb;
523 unsigned char *bitmap;
524 int ret;
525
526 if (!env->kvm_sw_tlb) {
527 return;
528 }
529
530 bitmap = g_malloc((env->nb_tlb + 7) / 8);
531 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
532
533 dirty_tlb.bitmap = (uintptr_t)bitmap;
534 dirty_tlb.num_dirty = env->nb_tlb;
535
536 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
537 if (ret) {
538 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
539 __func__, strerror(-ret));
540 }
541
542 g_free(bitmap);
543 }
544
545 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
546 {
547 PowerPCCPU *cpu = POWERPC_CPU(cs);
548 CPUPPCState *env = &cpu->env;
549 union {
550 uint32_t u32;
551 uint64_t u64;
552 } val;
553 struct kvm_one_reg reg = {
554 .id = id,
555 .addr = (uintptr_t) &val,
556 };
557 int ret;
558
559 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
560 if (ret != 0) {
561 trace_kvm_failed_spr_get(spr, strerror(errno));
562 } else {
563 switch (id & KVM_REG_SIZE_MASK) {
564 case KVM_REG_SIZE_U32:
565 env->spr[spr] = val.u32;
566 break;
567
568 case KVM_REG_SIZE_U64:
569 env->spr[spr] = val.u64;
570 break;
571
572 default:
573 /* Don't handle this size yet */
574 abort();
575 }
576 }
577 }
578
579 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
580 {
581 PowerPCCPU *cpu = POWERPC_CPU(cs);
582 CPUPPCState *env = &cpu->env;
583 union {
584 uint32_t u32;
585 uint64_t u64;
586 } val;
587 struct kvm_one_reg reg = {
588 .id = id,
589 .addr = (uintptr_t) &val,
590 };
591 int ret;
592
593 switch (id & KVM_REG_SIZE_MASK) {
594 case KVM_REG_SIZE_U32:
595 val.u32 = env->spr[spr];
596 break;
597
598 case KVM_REG_SIZE_U64:
599 val.u64 = env->spr[spr];
600 break;
601
602 default:
603 /* Don't handle this size yet */
604 abort();
605 }
606
607 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
608 if (ret != 0) {
609 trace_kvm_failed_spr_set(spr, strerror(errno));
610 }
611 }
612
613 static int kvm_put_fp(CPUState *cs)
614 {
615 PowerPCCPU *cpu = POWERPC_CPU(cs);
616 CPUPPCState *env = &cpu->env;
617 struct kvm_one_reg reg;
618 int i;
619 int ret;
620
621 if (env->insns_flags & PPC_FLOAT) {
622 uint64_t fpscr = env->fpscr;
623 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
624
625 reg.id = KVM_REG_PPC_FPSCR;
626 reg.addr = (uintptr_t)&fpscr;
627 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
628 if (ret < 0) {
629 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
630 return ret;
631 }
632
633 for (i = 0; i < 32; i++) {
634 uint64_t vsr[2];
635 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
636 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
637
638 #ifdef HOST_WORDS_BIGENDIAN
639 vsr[0] = float64_val(*fpr);
640 vsr[1] = *vsrl;
641 #else
642 vsr[0] = *vsrl;
643 vsr[1] = float64_val(*fpr);
644 #endif
645 reg.addr = (uintptr_t) &vsr;
646 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
647
648 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
649 if (ret < 0) {
650 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
651 i, strerror(errno));
652 return ret;
653 }
654 }
655 }
656
657 if (env->insns_flags & PPC_ALTIVEC) {
658 reg.id = KVM_REG_PPC_VSCR;
659 reg.addr = (uintptr_t)&env->vscr;
660 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
661 if (ret < 0) {
662 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
663 return ret;
664 }
665
666 for (i = 0; i < 32; i++) {
667 reg.id = KVM_REG_PPC_VR(i);
668 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
669 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
670 if (ret < 0) {
671 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
672 return ret;
673 }
674 }
675 }
676
677 return 0;
678 }
679
680 static int kvm_get_fp(CPUState *cs)
681 {
682 PowerPCCPU *cpu = POWERPC_CPU(cs);
683 CPUPPCState *env = &cpu->env;
684 struct kvm_one_reg reg;
685 int i;
686 int ret;
687
688 if (env->insns_flags & PPC_FLOAT) {
689 uint64_t fpscr;
690 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
691
692 reg.id = KVM_REG_PPC_FPSCR;
693 reg.addr = (uintptr_t)&fpscr;
694 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
695 if (ret < 0) {
696 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
697 return ret;
698 } else {
699 env->fpscr = fpscr;
700 }
701
702 for (i = 0; i < 32; i++) {
703 uint64_t vsr[2];
704 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
705 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
706
707 reg.addr = (uintptr_t) &vsr;
708 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
709
710 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
711 if (ret < 0) {
712 DPRINTF("Unable to get %s%d from KVM: %s\n",
713 vsx ? "VSR" : "FPR", i, strerror(errno));
714 return ret;
715 } else {
716 #ifdef HOST_WORDS_BIGENDIAN
717 *fpr = vsr[0];
718 if (vsx) {
719 *vsrl = vsr[1];
720 }
721 #else
722 *fpr = vsr[1];
723 if (vsx) {
724 *vsrl = vsr[0];
725 }
726 #endif
727 }
728 }
729 }
730
731 if (env->insns_flags & PPC_ALTIVEC) {
732 reg.id = KVM_REG_PPC_VSCR;
733 reg.addr = (uintptr_t)&env->vscr;
734 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
737 return ret;
738 }
739
740 for (i = 0; i < 32; i++) {
741 reg.id = KVM_REG_PPC_VR(i);
742 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
743 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
744 if (ret < 0) {
745 DPRINTF("Unable to get VR%d from KVM: %s\n",
746 i, strerror(errno));
747 return ret;
748 }
749 }
750 }
751
752 return 0;
753 }
754
755 #if defined(TARGET_PPC64)
756 static int kvm_get_vpa(CPUState *cs)
757 {
758 PowerPCCPU *cpu = POWERPC_CPU(cs);
759 SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
760 struct kvm_one_reg reg;
761 int ret;
762
763 reg.id = KVM_REG_PPC_VPA_ADDR;
764 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
765 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
766 if (ret < 0) {
767 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
768 return ret;
769 }
770
771 assert((uintptr_t)&spapr_cpu->slb_shadow_size
772 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
773 reg.id = KVM_REG_PPC_VPA_SLB;
774 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
775 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
776 if (ret < 0) {
777 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
778 strerror(errno));
779 return ret;
780 }
781
782 assert((uintptr_t)&spapr_cpu->dtl_size
783 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
784 reg.id = KVM_REG_PPC_VPA_DTL;
785 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
786 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
787 if (ret < 0) {
788 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
789 strerror(errno));
790 return ret;
791 }
792
793 return 0;
794 }
795
796 static int kvm_put_vpa(CPUState *cs)
797 {
798 PowerPCCPU *cpu = POWERPC_CPU(cs);
799 SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
800 struct kvm_one_reg reg;
801 int ret;
802
803 /* SLB shadow or DTL can't be registered unless a master VPA is
804 * registered. That means when restoring state, if a VPA *is*
805 * registered, we need to set that up first. If not, we need to
806 * deregister the others before deregistering the master VPA */
807 assert(spapr_cpu->vpa_addr
808 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
809
810 if (spapr_cpu->vpa_addr) {
811 reg.id = KVM_REG_PPC_VPA_ADDR;
812 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
813 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
814 if (ret < 0) {
815 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
816 return ret;
817 }
818 }
819
820 assert((uintptr_t)&spapr_cpu->slb_shadow_size
821 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
822 reg.id = KVM_REG_PPC_VPA_SLB;
823 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
824 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
825 if (ret < 0) {
826 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
827 return ret;
828 }
829
830 assert((uintptr_t)&spapr_cpu->dtl_size
831 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
832 reg.id = KVM_REG_PPC_VPA_DTL;
833 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
834 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
835 if (ret < 0) {
836 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
837 strerror(errno));
838 return ret;
839 }
840
841 if (!spapr_cpu->vpa_addr) {
842 reg.id = KVM_REG_PPC_VPA_ADDR;
843 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
844 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
845 if (ret < 0) {
846 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
847 return ret;
848 }
849 }
850
851 return 0;
852 }
853 #endif /* TARGET_PPC64 */
854
855 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
856 {
857 CPUPPCState *env = &cpu->env;
858 struct kvm_sregs sregs;
859 int i;
860
861 sregs.pvr = env->spr[SPR_PVR];
862
863 if (cpu->vhyp) {
864 PPCVirtualHypervisorClass *vhc =
865 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
866 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
867 } else {
868 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
869 }
870
871 /* Sync SLB */
872 #ifdef TARGET_PPC64
873 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
874 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
875 if (env->slb[i].esid & SLB_ESID_V) {
876 sregs.u.s.ppc64.slb[i].slbe |= i;
877 }
878 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
879 }
880 #endif
881
882 /* Sync SRs */
883 for (i = 0; i < 16; i++) {
884 sregs.u.s.ppc32.sr[i] = env->sr[i];
885 }
886
887 /* Sync BATs */
888 for (i = 0; i < 8; i++) {
889 /* Beware. We have to swap upper and lower bits here */
890 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
891 | env->DBAT[1][i];
892 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
893 | env->IBAT[1][i];
894 }
895
896 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
897 }
898
899 int kvm_arch_put_registers(CPUState *cs, int level)
900 {
901 PowerPCCPU *cpu = POWERPC_CPU(cs);
902 CPUPPCState *env = &cpu->env;
903 struct kvm_regs regs;
904 int ret;
905 int i;
906
907 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
908 if (ret < 0) {
909 return ret;
910 }
911
912 regs.ctr = env->ctr;
913 regs.lr = env->lr;
914 regs.xer = cpu_read_xer(env);
915 regs.msr = env->msr;
916 regs.pc = env->nip;
917
918 regs.srr0 = env->spr[SPR_SRR0];
919 regs.srr1 = env->spr[SPR_SRR1];
920
921 regs.sprg0 = env->spr[SPR_SPRG0];
922 regs.sprg1 = env->spr[SPR_SPRG1];
923 regs.sprg2 = env->spr[SPR_SPRG2];
924 regs.sprg3 = env->spr[SPR_SPRG3];
925 regs.sprg4 = env->spr[SPR_SPRG4];
926 regs.sprg5 = env->spr[SPR_SPRG5];
927 regs.sprg6 = env->spr[SPR_SPRG6];
928 regs.sprg7 = env->spr[SPR_SPRG7];
929
930 regs.pid = env->spr[SPR_BOOKE_PID];
931
932 for (i = 0;i < 32; i++)
933 regs.gpr[i] = env->gpr[i];
934
935 regs.cr = 0;
936 for (i = 0; i < 8; i++) {
937 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
938 }
939
940 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
941 if (ret < 0)
942 return ret;
943
944 kvm_put_fp(cs);
945
946 if (env->tlb_dirty) {
947 kvm_sw_tlb_put(cpu);
948 env->tlb_dirty = false;
949 }
950
951 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
952 ret = kvmppc_put_books_sregs(cpu);
953 if (ret < 0) {
954 return ret;
955 }
956 }
957
958 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
959 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
960 }
961
962 if (cap_one_reg) {
963 int i;
964
965 /* We deliberately ignore errors here, for kernels which have
966 * the ONE_REG calls, but don't support the specific
967 * registers, there's a reasonable chance things will still
968 * work, at least until we try to migrate. */
969 for (i = 0; i < 1024; i++) {
970 uint64_t id = env->spr_cb[i].one_reg_id;
971
972 if (id != 0) {
973 kvm_put_one_spr(cs, id, i);
974 }
975 }
976
977 #ifdef TARGET_PPC64
978 if (msr_ts) {
979 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
981 }
982 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
984 }
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
992 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
993 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
994 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
995 }
996
997 if (cap_papr) {
998 if (kvm_put_vpa(cs) < 0) {
999 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1000 }
1001 }
1002
1003 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1004 #endif /* TARGET_PPC64 */
1005 }
1006
1007 return ret;
1008 }
1009
1010 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1011 {
1012 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1013 }
1014
1015 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1016 {
1017 CPUPPCState *env = &cpu->env;
1018 struct kvm_sregs sregs;
1019 int ret;
1020
1021 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1022 if (ret < 0) {
1023 return ret;
1024 }
1025
1026 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1027 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1028 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1029 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1030 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1031 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1032 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1033 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1034 env->spr[SPR_DECR] = sregs.u.e.dec;
1035 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1036 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1037 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1038 }
1039
1040 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1041 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1042 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1043 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1044 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1045 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1046 }
1047
1048 if (sregs.u.e.features & KVM_SREGS_E_64) {
1049 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1050 }
1051
1052 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1053 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1054 }
1055
1056 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1057 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1058 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1059 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1060 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1061 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1062 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1063 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1064 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1065 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1066 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1067 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1068 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1069 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1070 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1071 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1072 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1073 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1074 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1075 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1076 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1077 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1078 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1079 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1080 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1081 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1082 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1083 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1084 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1085 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1086 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1087 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1088 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1089
1090 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1091 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1092 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1093 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1094 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1095 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1096 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1097 }
1098
1099 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1100 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1101 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1102 }
1103
1104 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1105 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1106 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1107 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1108 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1109 }
1110 }
1111
1112 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1113 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1114 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1115 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1116 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1117 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1118 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1119 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1120 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1121 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1122 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1123 }
1124
1125 if (sregs.u.e.features & KVM_SREGS_EXP) {
1126 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1127 }
1128
1129 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1130 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1131 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1132 }
1133
1134 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1135 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1136 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1137 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1138
1139 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1140 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1141 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1142 }
1143 }
1144
1145 return 0;
1146 }
1147
1148 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1149 {
1150 CPUPPCState *env = &cpu->env;
1151 struct kvm_sregs sregs;
1152 int ret;
1153 int i;
1154
1155 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1156 if (ret < 0) {
1157 return ret;
1158 }
1159
1160 if (!cpu->vhyp) {
1161 ppc_store_sdr1(env, sregs.u.s.sdr1);
1162 }
1163
1164 /* Sync SLB */
1165 #ifdef TARGET_PPC64
1166 /*
1167 * The packed SLB array we get from KVM_GET_SREGS only contains
1168 * information about valid entries. So we flush our internal copy
1169 * to get rid of stale ones, then put all valid SLB entries back
1170 * in.
1171 */
1172 memset(env->slb, 0, sizeof(env->slb));
1173 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1174 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1175 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1176 /*
1177 * Only restore valid entries
1178 */
1179 if (rb & SLB_ESID_V) {
1180 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1181 }
1182 }
1183 #endif
1184
1185 /* Sync SRs */
1186 for (i = 0; i < 16; i++) {
1187 env->sr[i] = sregs.u.s.ppc32.sr[i];
1188 }
1189
1190 /* Sync BATs */
1191 for (i = 0; i < 8; i++) {
1192 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1193 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1194 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1195 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1196 }
1197
1198 return 0;
1199 }
1200
1201 int kvm_arch_get_registers(CPUState *cs)
1202 {
1203 PowerPCCPU *cpu = POWERPC_CPU(cs);
1204 CPUPPCState *env = &cpu->env;
1205 struct kvm_regs regs;
1206 uint32_t cr;
1207 int i, ret;
1208
1209 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1210 if (ret < 0)
1211 return ret;
1212
1213 cr = regs.cr;
1214 for (i = 7; i >= 0; i--) {
1215 env->crf[i] = cr & 15;
1216 cr >>= 4;
1217 }
1218
1219 env->ctr = regs.ctr;
1220 env->lr = regs.lr;
1221 cpu_write_xer(env, regs.xer);
1222 env->msr = regs.msr;
1223 env->nip = regs.pc;
1224
1225 env->spr[SPR_SRR0] = regs.srr0;
1226 env->spr[SPR_SRR1] = regs.srr1;
1227
1228 env->spr[SPR_SPRG0] = regs.sprg0;
1229 env->spr[SPR_SPRG1] = regs.sprg1;
1230 env->spr[SPR_SPRG2] = regs.sprg2;
1231 env->spr[SPR_SPRG3] = regs.sprg3;
1232 env->spr[SPR_SPRG4] = regs.sprg4;
1233 env->spr[SPR_SPRG5] = regs.sprg5;
1234 env->spr[SPR_SPRG6] = regs.sprg6;
1235 env->spr[SPR_SPRG7] = regs.sprg7;
1236
1237 env->spr[SPR_BOOKE_PID] = regs.pid;
1238
1239 for (i = 0;i < 32; i++)
1240 env->gpr[i] = regs.gpr[i];
1241
1242 kvm_get_fp(cs);
1243
1244 if (cap_booke_sregs) {
1245 ret = kvmppc_get_booke_sregs(cpu);
1246 if (ret < 0) {
1247 return ret;
1248 }
1249 }
1250
1251 if (cap_segstate) {
1252 ret = kvmppc_get_books_sregs(cpu);
1253 if (ret < 0) {
1254 return ret;
1255 }
1256 }
1257
1258 if (cap_hior) {
1259 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1260 }
1261
1262 if (cap_one_reg) {
1263 int i;
1264
1265 /* We deliberately ignore errors here, for kernels which have
1266 * the ONE_REG calls, but don't support the specific
1267 * registers, there's a reasonable chance things will still
1268 * work, at least until we try to migrate. */
1269 for (i = 0; i < 1024; i++) {
1270 uint64_t id = env->spr_cb[i].one_reg_id;
1271
1272 if (id != 0) {
1273 kvm_get_one_spr(cs, id, i);
1274 }
1275 }
1276
1277 #ifdef TARGET_PPC64
1278 if (msr_ts) {
1279 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1280 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1281 }
1282 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1284 }
1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1291 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1292 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1293 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1294 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1295 }
1296
1297 if (cap_papr) {
1298 if (kvm_get_vpa(cs) < 0) {
1299 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1300 }
1301 }
1302
1303 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1304 #endif
1305 }
1306
1307 return 0;
1308 }
1309
1310 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1311 {
1312 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1313
1314 if (irq != PPC_INTERRUPT_EXT) {
1315 return 0;
1316 }
1317
1318 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1319 return 0;
1320 }
1321
1322 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1323
1324 return 0;
1325 }
1326
1327 #if defined(TARGET_PPC64)
1328 #define PPC_INPUT_INT PPC970_INPUT_INT
1329 #else
1330 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1331 #endif
1332
1333 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1334 {
1335 PowerPCCPU *cpu = POWERPC_CPU(cs);
1336 CPUPPCState *env = &cpu->env;
1337 int r;
1338 unsigned irq;
1339
1340 qemu_mutex_lock_iothread();
1341
1342 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1343 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1344 if (!cap_interrupt_level &&
1345 run->ready_for_interrupt_injection &&
1346 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1347 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1348 {
1349 /* For now KVM disregards the 'irq' argument. However, in the
1350 * future KVM could cache it in-kernel to avoid a heavyweight exit
1351 * when reading the UIC.
1352 */
1353 irq = KVM_INTERRUPT_SET;
1354
1355 DPRINTF("injected interrupt %d\n", irq);
1356 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1357 if (r < 0) {
1358 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1359 }
1360
1361 /* Always wake up soon in case the interrupt was level based */
1362 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1363 (NANOSECONDS_PER_SECOND / 50));
1364 }
1365
1366 /* We don't know if there are more interrupts pending after this. However,
1367 * the guest will return to userspace in the course of handling this one
1368 * anyways, so we will get a chance to deliver the rest. */
1369
1370 qemu_mutex_unlock_iothread();
1371 }
1372
1373 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1374 {
1375 return MEMTXATTRS_UNSPECIFIED;
1376 }
1377
1378 int kvm_arch_process_async_events(CPUState *cs)
1379 {
1380 return cs->halted;
1381 }
1382
1383 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1384 {
1385 CPUState *cs = CPU(cpu);
1386 CPUPPCState *env = &cpu->env;
1387
1388 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1389 cs->halted = 1;
1390 cs->exception_index = EXCP_HLT;
1391 }
1392
1393 return 0;
1394 }
1395
1396 /* map dcr access to existing qemu dcr emulation */
1397 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1398 {
1399 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1400 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1401
1402 return 0;
1403 }
1404
1405 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1406 {
1407 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1408 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1409
1410 return 0;
1411 }
1412
1413 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1414 {
1415 /* Mixed endian case is not handled */
1416 uint32_t sc = debug_inst_opcode;
1417
1418 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1419 sizeof(sc), 0) ||
1420 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1421 return -EINVAL;
1422 }
1423
1424 return 0;
1425 }
1426
1427 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1428 {
1429 uint32_t sc;
1430
1431 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1432 sc != debug_inst_opcode ||
1433 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1434 sizeof(sc), 1)) {
1435 return -EINVAL;
1436 }
1437
1438 return 0;
1439 }
1440
1441 static int find_hw_breakpoint(target_ulong addr, int type)
1442 {
1443 int n;
1444
1445 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1446 <= ARRAY_SIZE(hw_debug_points));
1447
1448 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1449 if (hw_debug_points[n].addr == addr &&
1450 hw_debug_points[n].type == type) {
1451 return n;
1452 }
1453 }
1454
1455 return -1;
1456 }
1457
1458 static int find_hw_watchpoint(target_ulong addr, int *flag)
1459 {
1460 int n;
1461
1462 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1463 if (n >= 0) {
1464 *flag = BP_MEM_ACCESS;
1465 return n;
1466 }
1467
1468 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1469 if (n >= 0) {
1470 *flag = BP_MEM_WRITE;
1471 return n;
1472 }
1473
1474 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1475 if (n >= 0) {
1476 *flag = BP_MEM_READ;
1477 return n;
1478 }
1479
1480 return -1;
1481 }
1482
1483 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1484 target_ulong len, int type)
1485 {
1486 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1487 return -ENOBUFS;
1488 }
1489
1490 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1491 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1492
1493 switch (type) {
1494 case GDB_BREAKPOINT_HW:
1495 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1496 return -ENOBUFS;
1497 }
1498
1499 if (find_hw_breakpoint(addr, type) >= 0) {
1500 return -EEXIST;
1501 }
1502
1503 nb_hw_breakpoint++;
1504 break;
1505
1506 case GDB_WATCHPOINT_WRITE:
1507 case GDB_WATCHPOINT_READ:
1508 case GDB_WATCHPOINT_ACCESS:
1509 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1510 return -ENOBUFS;
1511 }
1512
1513 if (find_hw_breakpoint(addr, type) >= 0) {
1514 return -EEXIST;
1515 }
1516
1517 nb_hw_watchpoint++;
1518 break;
1519
1520 default:
1521 return -ENOSYS;
1522 }
1523
1524 return 0;
1525 }
1526
1527 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1528 target_ulong len, int type)
1529 {
1530 int n;
1531
1532 n = find_hw_breakpoint(addr, type);
1533 if (n < 0) {
1534 return -ENOENT;
1535 }
1536
1537 switch (type) {
1538 case GDB_BREAKPOINT_HW:
1539 nb_hw_breakpoint--;
1540 break;
1541
1542 case GDB_WATCHPOINT_WRITE:
1543 case GDB_WATCHPOINT_READ:
1544 case GDB_WATCHPOINT_ACCESS:
1545 nb_hw_watchpoint--;
1546 break;
1547
1548 default:
1549 return -ENOSYS;
1550 }
1551 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1552
1553 return 0;
1554 }
1555
1556 void kvm_arch_remove_all_hw_breakpoints(void)
1557 {
1558 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1559 }
1560
1561 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1562 {
1563 int n;
1564
1565 /* Software Breakpoint updates */
1566 if (kvm_sw_breakpoints_active(cs)) {
1567 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1568 }
1569
1570 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1571 <= ARRAY_SIZE(hw_debug_points));
1572 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1573
1574 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1575 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1576 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1577 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1578 switch (hw_debug_points[n].type) {
1579 case GDB_BREAKPOINT_HW:
1580 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1581 break;
1582 case GDB_WATCHPOINT_WRITE:
1583 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1584 break;
1585 case GDB_WATCHPOINT_READ:
1586 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1587 break;
1588 case GDB_WATCHPOINT_ACCESS:
1589 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1590 KVMPPC_DEBUG_WATCH_READ;
1591 break;
1592 default:
1593 cpu_abort(cs, "Unsupported breakpoint type\n");
1594 }
1595 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1596 }
1597 }
1598 }
1599
1600 static int kvm_handle_hw_breakpoint(CPUState *cs,
1601 struct kvm_debug_exit_arch *arch_info)
1602 {
1603 int handle = 0;
1604 int n;
1605 int flag = 0;
1606
1607 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1608 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1609 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1610 if (n >= 0) {
1611 handle = 1;
1612 }
1613 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1614 KVMPPC_DEBUG_WATCH_WRITE)) {
1615 n = find_hw_watchpoint(arch_info->address, &flag);
1616 if (n >= 0) {
1617 handle = 1;
1618 cs->watchpoint_hit = &hw_watchpoint;
1619 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1620 hw_watchpoint.flags = flag;
1621 }
1622 }
1623 }
1624 return handle;
1625 }
1626
1627 static int kvm_handle_singlestep(void)
1628 {
1629 return 1;
1630 }
1631
1632 static int kvm_handle_sw_breakpoint(void)
1633 {
1634 return 1;
1635 }
1636
1637 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1638 {
1639 CPUState *cs = CPU(cpu);
1640 CPUPPCState *env = &cpu->env;
1641 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1642
1643 if (cs->singlestep_enabled) {
1644 return kvm_handle_singlestep();
1645 }
1646
1647 if (arch_info->status) {
1648 return kvm_handle_hw_breakpoint(cs, arch_info);
1649 }
1650
1651 if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1652 return kvm_handle_sw_breakpoint();
1653 }
1654
1655 /*
1656 * QEMU is not able to handle debug exception, so inject
1657 * program exception to guest;
1658 * Yes program exception NOT debug exception !!
1659 * When QEMU is using debug resources then debug exception must
1660 * be always set. To achieve this we set MSR_DE and also set
1661 * MSRP_DEP so guest cannot change MSR_DE.
1662 * When emulating debug resource for guest we want guest
1663 * to control MSR_DE (enable/disable debug interrupt on need).
1664 * Supporting both configurations are NOT possible.
1665 * So the result is that we cannot share debug resources
1666 * between QEMU and Guest on BOOKE architecture.
1667 * In the current design QEMU gets the priority over guest,
1668 * this means that if QEMU is using debug resources then guest
1669 * cannot use them;
1670 * For software breakpoint QEMU uses a privileged instruction;
1671 * So there cannot be any reason that we are here for guest
1672 * set debug exception, only possibility is guest executed a
1673 * privileged / illegal instruction and that's why we are
1674 * injecting a program interrupt.
1675 */
1676 cpu_synchronize_state(cs);
1677 /*
1678 * env->nip is PC, so increment this by 4 to use
1679 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1680 */
1681 env->nip += 4;
1682 cs->exception_index = POWERPC_EXCP_PROGRAM;
1683 env->error_code = POWERPC_EXCP_INVAL;
1684 ppc_cpu_do_interrupt(cs);
1685
1686 return 0;
1687 }
1688
1689 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1690 {
1691 PowerPCCPU *cpu = POWERPC_CPU(cs);
1692 CPUPPCState *env = &cpu->env;
1693 int ret;
1694
1695 qemu_mutex_lock_iothread();
1696
1697 switch (run->exit_reason) {
1698 case KVM_EXIT_DCR:
1699 if (run->dcr.is_write) {
1700 DPRINTF("handle dcr write\n");
1701 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1702 } else {
1703 DPRINTF("handle dcr read\n");
1704 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1705 }
1706 break;
1707 case KVM_EXIT_HLT:
1708 DPRINTF("handle halt\n");
1709 ret = kvmppc_handle_halt(cpu);
1710 break;
1711 #if defined(TARGET_PPC64)
1712 case KVM_EXIT_PAPR_HCALL:
1713 DPRINTF("handle PAPR hypercall\n");
1714 run->papr_hcall.ret = spapr_hypercall(cpu,
1715 run->papr_hcall.nr,
1716 run->papr_hcall.args);
1717 ret = 0;
1718 break;
1719 #endif
1720 case KVM_EXIT_EPR:
1721 DPRINTF("handle epr\n");
1722 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1723 ret = 0;
1724 break;
1725 case KVM_EXIT_WATCHDOG:
1726 DPRINTF("handle watchdog expiry\n");
1727 watchdog_perform_action();
1728 ret = 0;
1729 break;
1730
1731 case KVM_EXIT_DEBUG:
1732 DPRINTF("handle debug exception\n");
1733 if (kvm_handle_debug(cpu, run)) {
1734 ret = EXCP_DEBUG;
1735 break;
1736 }
1737 /* re-enter, this exception was guest-internal */
1738 ret = 0;
1739 break;
1740
1741 default:
1742 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1743 ret = -1;
1744 break;
1745 }
1746
1747 qemu_mutex_unlock_iothread();
1748 return ret;
1749 }
1750
1751 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1752 {
1753 CPUState *cs = CPU(cpu);
1754 uint32_t bits = tsr_bits;
1755 struct kvm_one_reg reg = {
1756 .id = KVM_REG_PPC_OR_TSR,
1757 .addr = (uintptr_t) &bits,
1758 };
1759
1760 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1761 }
1762
1763 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1764 {
1765
1766 CPUState *cs = CPU(cpu);
1767 uint32_t bits = tsr_bits;
1768 struct kvm_one_reg reg = {
1769 .id = KVM_REG_PPC_CLEAR_TSR,
1770 .addr = (uintptr_t) &bits,
1771 };
1772
1773 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1774 }
1775
1776 int kvmppc_set_tcr(PowerPCCPU *cpu)
1777 {
1778 CPUState *cs = CPU(cpu);
1779 CPUPPCState *env = &cpu->env;
1780 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1781
1782 struct kvm_one_reg reg = {
1783 .id = KVM_REG_PPC_TCR,
1784 .addr = (uintptr_t) &tcr,
1785 };
1786
1787 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1788 }
1789
1790 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1791 {
1792 CPUState *cs = CPU(cpu);
1793 int ret;
1794
1795 if (!kvm_enabled()) {
1796 return -1;
1797 }
1798
1799 if (!cap_ppc_watchdog) {
1800 printf("warning: KVM does not support watchdog");
1801 return -1;
1802 }
1803
1804 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1805 if (ret < 0) {
1806 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1807 __func__, strerror(-ret));
1808 return ret;
1809 }
1810
1811 return ret;
1812 }
1813
1814 static int read_cpuinfo(const char *field, char *value, int len)
1815 {
1816 FILE *f;
1817 int ret = -1;
1818 int field_len = strlen(field);
1819 char line[512];
1820
1821 f = fopen("/proc/cpuinfo", "r");
1822 if (!f) {
1823 return -1;
1824 }
1825
1826 do {
1827 if (!fgets(line, sizeof(line), f)) {
1828 break;
1829 }
1830 if (!strncmp(line, field, field_len)) {
1831 pstrcpy(value, len, line);
1832 ret = 0;
1833 break;
1834 }
1835 } while(*line);
1836
1837 fclose(f);
1838
1839 return ret;
1840 }
1841
1842 uint32_t kvmppc_get_tbfreq(void)
1843 {
1844 char line[512];
1845 char *ns;
1846 uint32_t retval = NANOSECONDS_PER_SECOND;
1847
1848 if (read_cpuinfo("timebase", line, sizeof(line))) {
1849 return retval;
1850 }
1851
1852 if (!(ns = strchr(line, ':'))) {
1853 return retval;
1854 }
1855
1856 ns++;
1857
1858 return atoi(ns);
1859 }
1860
1861 bool kvmppc_get_host_serial(char **value)
1862 {
1863 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1864 NULL);
1865 }
1866
1867 bool kvmppc_get_host_model(char **value)
1868 {
1869 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1870 }
1871
1872 /* Try to find a device tree node for a CPU with clock-frequency property */
1873 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1874 {
1875 struct dirent *dirp;
1876 DIR *dp;
1877
1878 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1879 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1880 return -1;
1881 }
1882
1883 buf[0] = '\0';
1884 while ((dirp = readdir(dp)) != NULL) {
1885 FILE *f;
1886 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1887 dirp->d_name);
1888 f = fopen(buf, "r");
1889 if (f) {
1890 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1891 fclose(f);
1892 break;
1893 }
1894 buf[0] = '\0';
1895 }
1896 closedir(dp);
1897 if (buf[0] == '\0') {
1898 printf("Unknown host!\n");
1899 return -1;
1900 }
1901
1902 return 0;
1903 }
1904
1905 static uint64_t kvmppc_read_int_dt(const char *filename)
1906 {
1907 union {
1908 uint32_t v32;
1909 uint64_t v64;
1910 } u;
1911 FILE *f;
1912 int len;
1913
1914 f = fopen(filename, "rb");
1915 if (!f) {
1916 return -1;
1917 }
1918
1919 len = fread(&u, 1, sizeof(u), f);
1920 fclose(f);
1921 switch (len) {
1922 case 4:
1923 /* property is a 32-bit quantity */
1924 return be32_to_cpu(u.v32);
1925 case 8:
1926 return be64_to_cpu(u.v64);
1927 }
1928
1929 return 0;
1930 }
1931
1932 /* Read a CPU node property from the host device tree that's a single
1933 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1934 * (can't find or open the property, or doesn't understand the
1935 * format) */
1936 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1937 {
1938 char buf[PATH_MAX], *tmp;
1939 uint64_t val;
1940
1941 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1942 return -1;
1943 }
1944
1945 tmp = g_strdup_printf("%s/%s", buf, propname);
1946 val = kvmppc_read_int_dt(tmp);
1947 g_free(tmp);
1948
1949 return val;
1950 }
1951
1952 uint64_t kvmppc_get_clockfreq(void)
1953 {
1954 return kvmppc_read_int_cpu_dt("clock-frequency");
1955 }
1956
1957 static int kvmppc_get_dec_bits(void)
1958 {
1959 int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
1960
1961 if (nr_bits > 0) {
1962 return nr_bits;
1963 }
1964 return 0;
1965 }
1966
1967 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1968 {
1969 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1970 CPUState *cs = CPU(cpu);
1971
1972 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1973 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1974 return 0;
1975 }
1976
1977 return 1;
1978 }
1979
1980 int kvmppc_get_hasidle(CPUPPCState *env)
1981 {
1982 struct kvm_ppc_pvinfo pvinfo;
1983
1984 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1985 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1986 return 1;
1987 }
1988
1989 return 0;
1990 }
1991
1992 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1993 {
1994 uint32_t *hc = (uint32_t*)buf;
1995 struct kvm_ppc_pvinfo pvinfo;
1996
1997 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1998 memcpy(buf, pvinfo.hcall, buf_len);
1999 return 0;
2000 }
2001
2002 /*
2003 * Fallback to always fail hypercalls regardless of endianness:
2004 *
2005 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2006 * li r3, -1
2007 * b .+8 (becomes nop in wrong endian)
2008 * bswap32(li r3, -1)
2009 */
2010
2011 hc[0] = cpu_to_be32(0x08000048);
2012 hc[1] = cpu_to_be32(0x3860ffff);
2013 hc[2] = cpu_to_be32(0x48000008);
2014 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2015
2016 return 1;
2017 }
2018
2019 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2020 {
2021 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2022 }
2023
2024 void kvmppc_enable_logical_ci_hcalls(void)
2025 {
2026 /*
2027 * FIXME: it would be nice if we could detect the cases where
2028 * we're using a device which requires the in kernel
2029 * implementation of these hcalls, but the kernel lacks them and
2030 * produce a warning.
2031 */
2032 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2033 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2034 }
2035
2036 void kvmppc_enable_set_mode_hcall(void)
2037 {
2038 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2039 }
2040
2041 void kvmppc_enable_clear_ref_mod_hcalls(void)
2042 {
2043 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2044 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2045 }
2046
2047 void kvmppc_enable_h_page_init(void)
2048 {
2049 kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
2050 }
2051
2052 void kvmppc_set_papr(PowerPCCPU *cpu)
2053 {
2054 CPUState *cs = CPU(cpu);
2055 int ret;
2056
2057 if (!kvm_enabled()) {
2058 return;
2059 }
2060
2061 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2062 if (ret) {
2063 error_report("This vCPU type or KVM version does not support PAPR");
2064 exit(1);
2065 }
2066
2067 /* Update the capability flag so we sync the right information
2068 * with kvm */
2069 cap_papr = 1;
2070 }
2071
2072 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2073 {
2074 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2075 }
2076
2077 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2078 {
2079 CPUState *cs = CPU(cpu);
2080 int ret;
2081
2082 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2083 if (ret && mpic_proxy) {
2084 error_report("This KVM version does not support EPR");
2085 exit(1);
2086 }
2087 }
2088
2089 int kvmppc_smt_threads(void)
2090 {
2091 return cap_ppc_smt ? cap_ppc_smt : 1;
2092 }
2093
2094 int kvmppc_set_smt_threads(int smt)
2095 {
2096 int ret;
2097
2098 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2099 if (!ret) {
2100 cap_ppc_smt = smt;
2101 }
2102 return ret;
2103 }
2104
2105 void kvmppc_hint_smt_possible(Error **errp)
2106 {
2107 int i;
2108 GString *g;
2109 char *s;
2110
2111 assert(kvm_enabled());
2112 if (cap_ppc_smt_possible) {
2113 g = g_string_new("Available VSMT modes:");
2114 for (i = 63; i >= 0; i--) {
2115 if ((1UL << i) & cap_ppc_smt_possible) {
2116 g_string_append_printf(g, " %lu", (1UL << i));
2117 }
2118 }
2119 s = g_string_free(g, false);
2120 error_append_hint(errp, "%s.\n", s);
2121 g_free(s);
2122 } else {
2123 error_append_hint(errp,
2124 "This KVM seems to be too old to support VSMT.\n");
2125 }
2126 }
2127
2128
2129 #ifdef TARGET_PPC64
2130 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2131 {
2132 struct kvm_ppc_smmu_info info;
2133 long rampagesize, best_page_shift;
2134 int i;
2135
2136 /* Find the largest hardware supported page size that's less than
2137 * or equal to the (logical) backing page size of guest RAM */
2138 kvm_get_smmu_info(&info, &error_fatal);
2139 rampagesize = qemu_getrampagesize();
2140 best_page_shift = 0;
2141
2142 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2143 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2144
2145 if (!sps->page_shift) {
2146 continue;
2147 }
2148
2149 if ((sps->page_shift > best_page_shift)
2150 && ((1UL << sps->page_shift) <= rampagesize)) {
2151 best_page_shift = sps->page_shift;
2152 }
2153 }
2154
2155 return MIN(current_size,
2156 1ULL << (best_page_shift + hash_shift - 7));
2157 }
2158 #endif
2159
2160 bool kvmppc_spapr_use_multitce(void)
2161 {
2162 return cap_spapr_multitce;
2163 }
2164
2165 int kvmppc_spapr_enable_inkernel_multitce(void)
2166 {
2167 int ret;
2168
2169 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2170 H_PUT_TCE_INDIRECT, 1);
2171 if (!ret) {
2172 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2173 H_STUFF_TCE, 1);
2174 }
2175
2176 return ret;
2177 }
2178
2179 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2180 uint64_t bus_offset, uint32_t nb_table,
2181 int *pfd, bool need_vfio)
2182 {
2183 long len;
2184 int fd;
2185 void *table;
2186
2187 /* Must set fd to -1 so we don't try to munmap when called for
2188 * destroying the table, which the upper layers -will- do
2189 */
2190 *pfd = -1;
2191 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2192 return NULL;
2193 }
2194
2195 if (cap_spapr_tce_64) {
2196 struct kvm_create_spapr_tce_64 args = {
2197 .liobn = liobn,
2198 .page_shift = page_shift,
2199 .offset = bus_offset >> page_shift,
2200 .size = nb_table,
2201 .flags = 0
2202 };
2203 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2204 if (fd < 0) {
2205 fprintf(stderr,
2206 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2207 liobn);
2208 return NULL;
2209 }
2210 } else if (cap_spapr_tce) {
2211 uint64_t window_size = (uint64_t) nb_table << page_shift;
2212 struct kvm_create_spapr_tce args = {
2213 .liobn = liobn,
2214 .window_size = window_size,
2215 };
2216 if ((window_size != args.window_size) || bus_offset) {
2217 return NULL;
2218 }
2219 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2220 if (fd < 0) {
2221 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2222 liobn);
2223 return NULL;
2224 }
2225 } else {
2226 return NULL;
2227 }
2228
2229 len = nb_table * sizeof(uint64_t);
2230 /* FIXME: round this up to page size */
2231
2232 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2233 if (table == MAP_FAILED) {
2234 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2235 liobn);
2236 close(fd);
2237 return NULL;
2238 }
2239
2240 *pfd = fd;
2241 return table;
2242 }
2243
2244 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2245 {
2246 long len;
2247
2248 if (fd < 0) {
2249 return -1;
2250 }
2251
2252 len = nb_table * sizeof(uint64_t);
2253 if ((munmap(table, len) < 0) ||
2254 (close(fd) < 0)) {
2255 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2256 strerror(errno));
2257 /* Leak the table */
2258 }
2259
2260 return 0;
2261 }
2262
2263 int kvmppc_reset_htab(int shift_hint)
2264 {
2265 uint32_t shift = shift_hint;
2266
2267 if (!kvm_enabled()) {
2268 /* Full emulation, tell caller to allocate htab itself */
2269 return 0;
2270 }
2271 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2272 int ret;
2273 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2274 if (ret == -ENOTTY) {
2275 /* At least some versions of PR KVM advertise the
2276 * capability, but don't implement the ioctl(). Oops.
2277 * Return 0 so that we allocate the htab in qemu, as is
2278 * correct for PR. */
2279 return 0;
2280 } else if (ret < 0) {
2281 return ret;
2282 }
2283 return shift;
2284 }
2285
2286 /* We have a kernel that predates the htab reset calls. For PR
2287 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2288 * this era, it has allocated a 16MB fixed size hash table already. */
2289 if (kvmppc_is_pr(kvm_state)) {
2290 /* PR - tell caller to allocate htab */
2291 return 0;
2292 } else {
2293 /* HV - assume 16MB kernel allocated htab */
2294 return 24;
2295 }
2296 }
2297
2298 static inline uint32_t mfpvr(void)
2299 {
2300 uint32_t pvr;
2301
2302 asm ("mfpvr %0"
2303 : "=r"(pvr));
2304 return pvr;
2305 }
2306
2307 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2308 {
2309 if (on) {
2310 *word |= flags;
2311 } else {
2312 *word &= ~flags;
2313 }
2314 }
2315
2316 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2317 {
2318 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2319 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2320 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2321
2322 /* Now fix up the class with information we can query from the host */
2323 pcc->pvr = mfpvr();
2324
2325 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2326 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2327 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2328 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2329 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2330 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2331
2332 if (dcache_size != -1) {
2333 pcc->l1_dcache_size = dcache_size;
2334 }
2335
2336 if (icache_size != -1) {
2337 pcc->l1_icache_size = icache_size;
2338 }
2339
2340 #if defined(TARGET_PPC64)
2341 pcc->radix_page_info = kvm_get_radix_page_info();
2342
2343 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2344 /*
2345 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2346 * compliant. More importantly, advertising ISA 3.00
2347 * architected mode may prevent guests from activating
2348 * necessary DD1 workarounds.
2349 */
2350 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2351 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2352 }
2353 #endif /* defined(TARGET_PPC64) */
2354 }
2355
2356 bool kvmppc_has_cap_epr(void)
2357 {
2358 return cap_epr;
2359 }
2360
2361 bool kvmppc_has_cap_fixup_hcalls(void)
2362 {
2363 return cap_fixup_hcalls;
2364 }
2365
2366 bool kvmppc_has_cap_htm(void)
2367 {
2368 return cap_htm;
2369 }
2370
2371 bool kvmppc_has_cap_mmu_radix(void)
2372 {
2373 return cap_mmu_radix;
2374 }
2375
2376 bool kvmppc_has_cap_mmu_hash_v3(void)
2377 {
2378 return cap_mmu_hash_v3;
2379 }
2380
2381 static bool kvmppc_power8_host(void)
2382 {
2383 bool ret = false;
2384 #ifdef TARGET_PPC64
2385 {
2386 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2387 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2388 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2389 (base_pvr == CPU_POWERPC_POWER8_BASE);
2390 }
2391 #endif /* TARGET_PPC64 */
2392 return ret;
2393 }
2394
2395 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2396 {
2397 bool l1d_thread_priv_req = !kvmppc_power8_host();
2398
2399 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2400 return 2;
2401 } else if ((!l1d_thread_priv_req ||
2402 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2403 (c.character & c.character_mask
2404 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2405 return 1;
2406 }
2407
2408 return 0;
2409 }
2410
2411 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2412 {
2413 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2414 return 2;
2415 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2416 return 1;
2417 }
2418
2419 return 0;
2420 }
2421
2422 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2423 {
2424 if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
2425 (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
2426 (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
2427 return SPAPR_CAP_FIXED_NA;
2428 } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
2429 return SPAPR_CAP_WORKAROUND;
2430 } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2431 return SPAPR_CAP_FIXED_CCD;
2432 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2433 return SPAPR_CAP_FIXED_IBS;
2434 }
2435
2436 return 0;
2437 }
2438
2439 static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
2440 {
2441 if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
2442 return 1;
2443 }
2444 return 0;
2445 }
2446
2447 static void kvmppc_get_cpu_characteristics(KVMState *s)
2448 {
2449 struct kvm_ppc_cpu_char c;
2450 int ret;
2451
2452 /* Assume broken */
2453 cap_ppc_safe_cache = 0;
2454 cap_ppc_safe_bounds_check = 0;
2455 cap_ppc_safe_indirect_branch = 0;
2456
2457 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2458 if (!ret) {
2459 return;
2460 }
2461 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2462 if (ret < 0) {
2463 return;
2464 }
2465
2466 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2467 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2468 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2469 cap_ppc_count_cache_flush_assist =
2470 parse_cap_ppc_count_cache_flush_assist(c);
2471 }
2472
2473 int kvmppc_get_cap_safe_cache(void)
2474 {
2475 return cap_ppc_safe_cache;
2476 }
2477
2478 int kvmppc_get_cap_safe_bounds_check(void)
2479 {
2480 return cap_ppc_safe_bounds_check;
2481 }
2482
2483 int kvmppc_get_cap_safe_indirect_branch(void)
2484 {
2485 return cap_ppc_safe_indirect_branch;
2486 }
2487
2488 int kvmppc_get_cap_count_cache_flush_assist(void)
2489 {
2490 return cap_ppc_count_cache_flush_assist;
2491 }
2492
2493 bool kvmppc_has_cap_nested_kvm_hv(void)
2494 {
2495 return !!cap_ppc_nested_kvm_hv;
2496 }
2497
2498 int kvmppc_set_cap_nested_kvm_hv(int enable)
2499 {
2500 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2501 }
2502
2503 bool kvmppc_has_cap_spapr_vfio(void)
2504 {
2505 return cap_spapr_vfio;
2506 }
2507
2508 int kvmppc_get_cap_large_decr(void)
2509 {
2510 return cap_large_decr;
2511 }
2512
2513 int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
2514 {
2515 CPUState *cs = CPU(cpu);
2516 uint64_t lpcr;
2517
2518 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2519 /* Do we need to modify the LPCR? */
2520 if (!!(lpcr & LPCR_LD) != !!enable) {
2521 if (enable) {
2522 lpcr |= LPCR_LD;
2523 } else {
2524 lpcr &= ~LPCR_LD;
2525 }
2526 kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2527 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2528
2529 if (!!(lpcr & LPCR_LD) != !!enable) {
2530 return -1;
2531 }
2532 }
2533
2534 return 0;
2535 }
2536
2537 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2538 {
2539 uint32_t host_pvr = mfpvr();
2540 PowerPCCPUClass *pvr_pcc;
2541
2542 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2543 if (pvr_pcc == NULL) {
2544 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2545 }
2546
2547 return pvr_pcc;
2548 }
2549
2550 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2551 {
2552 TypeInfo type_info = {
2553 .name = TYPE_HOST_POWERPC_CPU,
2554 .class_init = kvmppc_host_cpu_class_init,
2555 };
2556 MachineClass *mc = MACHINE_GET_CLASS(ms);
2557 PowerPCCPUClass *pvr_pcc;
2558 ObjectClass *oc;
2559 DeviceClass *dc;
2560 int i;
2561
2562 pvr_pcc = kvm_ppc_get_host_cpu_class();
2563 if (pvr_pcc == NULL) {
2564 return -1;
2565 }
2566 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2567 type_register(&type_info);
2568 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2569 /* override TCG default cpu type with 'host' cpu model */
2570 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2571 }
2572
2573 oc = object_class_by_name(type_info.name);
2574 g_assert(oc);
2575
2576 /*
2577 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2578 * we want "POWER8" to be a "family" alias that points to the current
2579 * host CPU type, too)
2580 */
2581 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2582 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2583 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2584 char *suffix;
2585
2586 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2587 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2588 if (suffix) {
2589 *suffix = 0;
2590 }
2591 break;
2592 }
2593 }
2594
2595 return 0;
2596 }
2597
2598 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2599 {
2600 struct kvm_rtas_token_args args = {
2601 .token = token,
2602 };
2603
2604 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2605 return -ENOENT;
2606 }
2607
2608 strncpy(args.name, function, sizeof(args.name));
2609
2610 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2611 }
2612
2613 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2614 {
2615 struct kvm_get_htab_fd s = {
2616 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2617 .start_index = index,
2618 };
2619 int ret;
2620
2621 if (!cap_htab_fd) {
2622 error_setg(errp, "KVM version doesn't support %s the HPT",
2623 write ? "writing" : "reading");
2624 return -ENOTSUP;
2625 }
2626
2627 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2628 if (ret < 0) {
2629 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2630 write ? "writing" : "reading", write ? "to" : "from",
2631 strerror(errno));
2632 return -errno;
2633 }
2634
2635 return ret;
2636 }
2637
2638 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2639 {
2640 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2641 uint8_t buf[bufsize];
2642 ssize_t rc;
2643
2644 do {
2645 rc = read(fd, buf, bufsize);
2646 if (rc < 0) {
2647 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2648 strerror(errno));
2649 return rc;
2650 } else if (rc) {
2651 uint8_t *buffer = buf;
2652 ssize_t n = rc;
2653 while (n) {
2654 struct kvm_get_htab_header *head =
2655 (struct kvm_get_htab_header *) buffer;
2656 size_t chunksize = sizeof(*head) +
2657 HASH_PTE_SIZE_64 * head->n_valid;
2658
2659 qemu_put_be32(f, head->index);
2660 qemu_put_be16(f, head->n_valid);
2661 qemu_put_be16(f, head->n_invalid);
2662 qemu_put_buffer(f, (void *)(head + 1),
2663 HASH_PTE_SIZE_64 * head->n_valid);
2664
2665 buffer += chunksize;
2666 n -= chunksize;
2667 }
2668 }
2669 } while ((rc != 0)
2670 && ((max_ns < 0)
2671 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2672
2673 return (rc == 0) ? 1 : 0;
2674 }
2675
2676 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2677 uint16_t n_valid, uint16_t n_invalid)
2678 {
2679 struct kvm_get_htab_header *buf;
2680 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2681 ssize_t rc;
2682
2683 buf = alloca(chunksize);
2684 buf->index = index;
2685 buf->n_valid = n_valid;
2686 buf->n_invalid = n_invalid;
2687
2688 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2689
2690 rc = write(fd, buf, chunksize);
2691 if (rc < 0) {
2692 fprintf(stderr, "Error writing KVM hash table: %s\n",
2693 strerror(errno));
2694 return rc;
2695 }
2696 if (rc != chunksize) {
2697 /* We should never get a short write on a single chunk */
2698 fprintf(stderr, "Short write, restoring KVM hash table\n");
2699 return -1;
2700 }
2701 return 0;
2702 }
2703
2704 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2705 {
2706 return true;
2707 }
2708
2709 void kvm_arch_init_irq_routing(KVMState *s)
2710 {
2711 }
2712
2713 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2714 {
2715 int fd, rc;
2716 int i;
2717
2718 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2719
2720 i = 0;
2721 while (i < n) {
2722 struct kvm_get_htab_header *hdr;
2723 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2724 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2725
2726 rc = read(fd, buf, sizeof(buf));
2727 if (rc < 0) {
2728 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2729 }
2730
2731 hdr = (struct kvm_get_htab_header *)buf;
2732 while ((i < n) && ((char *)hdr < (buf + rc))) {
2733 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2734
2735 if (hdr->index != (ptex + i)) {
2736 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2737 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2738 }
2739
2740 if (n - i < valid) {
2741 valid = n - i;
2742 }
2743 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2744 i += valid;
2745
2746 if ((n - i) < invalid) {
2747 invalid = n - i;
2748 }
2749 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2750 i += invalid;
2751
2752 hdr = (struct kvm_get_htab_header *)
2753 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2754 }
2755 }
2756
2757 close(fd);
2758 }
2759
2760 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2761 {
2762 int fd, rc;
2763 struct {
2764 struct kvm_get_htab_header hdr;
2765 uint64_t pte0;
2766 uint64_t pte1;
2767 } buf;
2768
2769 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2770
2771 buf.hdr.n_valid = 1;
2772 buf.hdr.n_invalid = 0;
2773 buf.hdr.index = ptex;
2774 buf.pte0 = cpu_to_be64(pte0);
2775 buf.pte1 = cpu_to_be64(pte1);
2776
2777 rc = write(fd, &buf, sizeof(buf));
2778 if (rc != sizeof(buf)) {
2779 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2780 }
2781 close(fd);
2782 }
2783
2784 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2785 uint64_t address, uint32_t data, PCIDevice *dev)
2786 {
2787 return 0;
2788 }
2789
2790 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2791 int vector, PCIDevice *dev)
2792 {
2793 return 0;
2794 }
2795
2796 int kvm_arch_release_virq_post(int virq)
2797 {
2798 return 0;
2799 }
2800
2801 int kvm_arch_msi_data_to_gsi(uint32_t data)
2802 {
2803 return data & 0xffff;
2804 }
2805
2806 int kvmppc_enable_hwrng(void)
2807 {
2808 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2809 return -1;
2810 }
2811
2812 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2813 }
2814
2815 void kvmppc_check_papr_resize_hpt(Error **errp)
2816 {
2817 if (!kvm_enabled()) {
2818 return; /* No KVM, we're good */
2819 }
2820
2821 if (cap_resize_hpt) {
2822 return; /* Kernel has explicit support, we're good */
2823 }
2824
2825 /* Otherwise fallback on looking for PR KVM */
2826 if (kvmppc_is_pr(kvm_state)) {
2827 return;
2828 }
2829
2830 error_setg(errp,
2831 "Hash page table resizing not available with this KVM version");
2832 }
2833
2834 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2835 {
2836 CPUState *cs = CPU(cpu);
2837 struct kvm_ppc_resize_hpt rhpt = {
2838 .flags = flags,
2839 .shift = shift,
2840 };
2841
2842 if (!cap_resize_hpt) {
2843 return -ENOSYS;
2844 }
2845
2846 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2847 }
2848
2849 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2850 {
2851 CPUState *cs = CPU(cpu);
2852 struct kvm_ppc_resize_hpt rhpt = {
2853 .flags = flags,
2854 .shift = shift,
2855 };
2856
2857 if (!cap_resize_hpt) {
2858 return -ENOSYS;
2859 }
2860
2861 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2862 }
2863
2864 /*
2865 * This is a helper function to detect a post migration scenario
2866 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2867 * the guest kernel can't handle a PVR value other than the actual host
2868 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2869 *
2870 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2871 * (so, we're HV), return true. The workaround itself is done in
2872 * cpu_post_load.
2873 *
2874 * The order here is important: we'll only check for KVM PR as a
2875 * fallback if the guest kernel can't handle the situation itself.
2876 * We need to avoid as much as possible querying the running KVM type
2877 * in QEMU level.
2878 */
2879 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2880 {
2881 CPUState *cs = CPU(cpu);
2882
2883 if (!kvm_enabled()) {
2884 return false;
2885 }
2886
2887 if (cap_ppc_pvr_compat) {
2888 return false;
2889 }
2890
2891 return !kvmppc_is_pr(cs->kvm_state);
2892 }
2893
2894 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2895 {
2896 CPUState *cs = CPU(cpu);
2897
2898 if (kvm_enabled()) {
2899 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2900 }
2901 }