target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #include "elf.h"
  51 #include "sysemu/kvm_int.h"
  52
  53 //#define DEBUG_KVM
  54
  55 #ifdef DEBUG_KVM
  56 #define DPRINTF(fmt, ...) \
  57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58 #else
  59 #define DPRINTF(fmt, ...) \
  60     do { } while (0)
  61 #endif
  62
  63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66     KVM_CAP_LAST_INFO
  67 };
  68
  69 static int cap_interrupt_unset = false;
  70 static int cap_interrupt_level = false;
  71 static int cap_segstate;
  72 static int cap_booke_sregs;
  73 static int cap_ppc_smt;
  74 static int cap_ppc_smt_possible;
  75 static int cap_spapr_tce;
  76 static int cap_spapr_tce_64;
  77 static int cap_spapr_multitce;
  78 static int cap_spapr_vfio;
  79 static int cap_hior;
  80 static int cap_one_reg;
  81 static int cap_epr;
  82 static int cap_ppc_watchdog;
  83 static int cap_papr;
  84 static int cap_htab_fd;
  85 static int cap_fixup_hcalls;
  86 static int cap_htm;             /* Hardware transactional memory support */
  87 static int cap_mmu_radix;
  88 static int cap_mmu_hash_v3;
  89 static int cap_resize_hpt;
  90 static int cap_ppc_pvr_compat;
  91 static int cap_ppc_safe_cache;
  92 static int cap_ppc_safe_bounds_check;
  93 static int cap_ppc_safe_indirect_branch;
  94
  95 static uint32_t debug_inst_opcode;
  96
  97 /* XXX We have a race condition where we actually have a level triggered
  98  *     interrupt, but the infrastructure can't expose that yet, so the guest
  99  *     takes but ignores it, goes to sleep and never gets notified that there's
 100  *     still an interrupt pending.
 101  *
 102  *     As a quick workaround, let's just wake up again 20 ms after we injected
 103  *     an interrupt. That way we can assure that we're always reinjecting
 104  *     interrupts in case the guest swallowed them.
 105  */
 106 static QEMUTimer *idle_timer;
 107
 108 static void kvm_kick_cpu(void *opaque)
 109 {
 110     PowerPCCPU *cpu = opaque;
 111
 112     qemu_cpu_kick(CPU(cpu));
 113 }
 114
 115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 116  * should only be used for fallback tests - generally we should use
 117  * explicit capabilities for the features we want, rather than
 118  * assuming what is/isn't available depending on the KVM variant. */
 119 static bool kvmppc_is_pr(KVMState *ks)
 120 {
 121     /* Assume KVM-PR if the GET_PVINFO capability is available */
 122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 123 }
 124
 125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 126 static void kvmppc_get_cpu_characteristics(KVMState *s);
 127
 128 int kvm_arch_init(MachineState *ms, KVMState *s)
 129 {
 130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 143     /* Note: we don't set cap_papr here, because this capability is
 144      * only activated after this by kvmppc_set_papr() */
 145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 152     kvmppc_get_cpu_characteristics(s);
 153     /*
 154      * Note: setting it to false because there is not such capability
 155      * in KVM at this moment.
 156      *
 157      * TODO: call kvm_vm_check_extension() with the right capability
 158      * after the kernel starts implementing it.*/
 159     cap_ppc_pvr_compat = false;
 160
 161     if (!cap_interrupt_level) {
 162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 163                         "VM to stall at times!\n");
 164     }
 165
 166     kvm_ppc_register_host_cpu_type(ms);
 167
 168     return 0;
 169 }
 170
 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 172 {
 173     return 0;
 174 }
 175
 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 177 {
 178     CPUPPCState *cenv = &cpu->env;
 179     CPUState *cs = CPU(cpu);
 180     struct kvm_sregs sregs;
 181     int ret;
 182
 183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 184         /* What we're really trying to say is "if we're on BookE, we use
 185            the native PVR for now". This is the only sane way to check
 186            it though, so we potentially confuse users that they can run
 187            BookE guests on BookS. Let's hope nobody dares enough :) */
 188         return 0;
 189     } else {
 190         if (!cap_segstate) {
 191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 192             return -ENOSYS;
 193         }
 194     }
 195
 196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 197     if (ret) {
 198         return ret;
 199     }
 200
 201     sregs.pvr = cenv->spr[SPR_PVR];
 202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 203 }
 204
 205 /* Set up a shared TLB array with KVM */
 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210     struct kvm_book3e_206_tlb_params params = {};
 211     struct kvm_config_tlb cfg = {};
 212     unsigned int entries = 0;
 213     int ret, i;
 214
 215     if (!kvm_enabled() ||
 216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 217         return 0;
 218     }
 219
 220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 221
 222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 225         entries += params.tlb_sizes[i];
 226     }
 227
 228     assert(entries == env->nb_tlb);
 229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 230
 231     env->tlb_dirty = true;
 232
 233     cfg.array = (uintptr_t)env->tlb.tlbm;
 234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 235     cfg.params = (uintptr_t)&params;
 236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 237
 238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 239     if (ret < 0) {
 240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 241                 __func__, strerror(-ret));
 242         return ret;
 243     }
 244
 245     env->kvm_sw_tlb = true;
 246     return 0;
 247 }
 248
 249
 250 #if defined(TARGET_PPC64)
 251 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 252 {
 253     int ret;
 254
 255     assert(kvm_state != NULL);
 256
 257     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 258         error_setg(errp, "KVM doesn't expose the MMU features it supports");
 259         error_append_hint(errp, "Consider switching to a newer KVM\n");
 260         return;
 261     }
 262
 263     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 264     if (ret == 0) {
 265         return;
 266     }
 267
 268     error_setg_errno(errp, -ret,
 269                      "KVM failed to provide the MMU features it supports");
 270 }
 271
 272 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 273 {
 274     KVMState *s = KVM_STATE(current_machine->accelerator);
 275     struct ppc_radix_page_info *radix_page_info;
 276     struct kvm_ppc_rmmu_info rmmu_info;
 277     int i;
 278
 279     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 280         return NULL;
 281     }
 282     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 283         return NULL;
 284     }
 285     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 286     radix_page_info->count = 0;
 287     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 288         if (rmmu_info.ap_encodings[i]) {
 289             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 290             radix_page_info->count++;
 291         }
 292     }
 293     return radix_page_info;
 294 }
 295
 296 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 297                                      bool radix, bool gtse,
 298                                      uint64_t proc_tbl)
 299 {
 300     CPUState *cs = CPU(cpu);
 301     int ret;
 302     uint64_t flags = 0;
 303     struct kvm_ppc_mmuv3_cfg cfg = {
 304         .process_table = proc_tbl,
 305     };
 306
 307     if (radix) {
 308         flags |= KVM_PPC_MMUV3_RADIX;
 309     }
 310     if (gtse) {
 311         flags |= KVM_PPC_MMUV3_GTSE;
 312     }
 313     cfg.flags = flags;
 314     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 315     switch (ret) {
 316     case 0:
 317         return H_SUCCESS;
 318     case -EINVAL:
 319         return H_PARAMETER;
 320     case -ENODEV:
 321         return H_NOT_AVAILABLE;
 322     default:
 323         return H_HARDWARE;
 324     }
 325 }
 326
 327 bool kvmppc_hpt_needs_host_contiguous_pages(void)
 328 {
 329     static struct kvm_ppc_smmu_info smmu_info;
 330
 331     if (!kvm_enabled()) {
 332         return false;
 333     }
 334
 335     kvm_get_smmu_info(&smmu_info, &error_fatal);
 336     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 337 }
 338
 339 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 340 {
 341     struct kvm_ppc_smmu_info smmu_info;
 342     int iq, ik, jq, jk;
 343     Error *local_err = NULL;
 344
 345     /* For now, we only have anything to check on hash64 MMUs */
 346     if (!cpu->hash64_opts || !kvm_enabled()) {
 347         return;
 348     }
 349
 350     kvm_get_smmu_info(&smmu_info, &local_err);
 351     if (local_err) {
 352         error_propagate(errp, local_err);
 353         return;
 354     }
 355
 356     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 357         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 358         error_setg(errp,
 359                    "KVM does not support 1TiB segments which guest expects");
 360         return;
 361     }
 362
 363     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 364         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 365                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
 366         return;
 367     }
 368
 369     /*
 370      * Verify that every pagesize supported by the cpu model is
 371      * supported by KVM with the same encodings
 372      */
 373     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 374         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 375         struct kvm_ppc_one_seg_page_size *ksps;
 376
 377         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 378             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 379                 break;
 380             }
 381         }
 382         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 383             error_setg(errp, "KVM doesn't support for base page shift %u",
 384                        qsps->page_shift);
 385             return;
 386         }
 387
 388         ksps = &smmu_info.sps[ik];
 389         if (ksps->slb_enc != qsps->slb_enc) {
 390             error_setg(errp,
 391 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 392                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 393             return;
 394         }
 395
 396         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 397             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 398                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 399                     break;
 400                 }
 401             }
 402
 403             if (jk >= ARRAY_SIZE(ksps->enc)) {
 404                 error_setg(errp, "KVM doesn't support page shift %u/%u",
 405                            qsps->enc[jq].page_shift, qsps->page_shift);
 406                 return;
 407             }
 408             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 409                 error_setg(errp,
 410 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 411                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 412                            qsps->page_shift, qsps->enc[jq].pte_enc);
 413                 return;
 414             }
 415         }
 416     }
 417
 418     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 419         /* Mostly what guest pagesizes we can use are related to the
 420          * host pages used to map guest RAM, which is handled in the
 421          * platform code. Cache-Inhibited largepages (64k) however are
 422          * used for I/O, so if they're mapped to the host at all it
 423          * will be a normal mapping, not a special hugepage one used
 424          * for RAM. */
 425         if (getpagesize() < 0x10000) {
 426             error_setg(errp,
 427                        "KVM can't supply 64kiB CI pages, which guest expects");
 428         }
 429     }
 430 }
 431 #endif /* !defined (TARGET_PPC64) */
 432
 433 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 434 {
 435     return POWERPC_CPU(cpu)->vcpu_id;
 436 }
 437
 438 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 439  * book3s supports only 1 watchpoint, so array size
 440  * of 4 is sufficient for now.
 441  */
 442 #define MAX_HW_BKPTS 4
 443
 444 static struct HWBreakpoint {
 445     target_ulong addr;
 446     int type;
 447 } hw_debug_points[MAX_HW_BKPTS];
 448
 449 static CPUWatchpoint hw_watchpoint;
 450
 451 /* Default there is no breakpoint and watchpoint supported */
 452 static int max_hw_breakpoint;
 453 static int max_hw_watchpoint;
 454 static int nb_hw_breakpoint;
 455 static int nb_hw_watchpoint;
 456
 457 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 458 {
 459     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 460         max_hw_breakpoint = 2;
 461         max_hw_watchpoint = 2;
 462     }
 463
 464     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 465         fprintf(stderr, "Error initializing h/w breakpoints\n");
 466         return;
 467     }
 468 }
 469
 470 int kvm_arch_init_vcpu(CPUState *cs)
 471 {
 472     PowerPCCPU *cpu = POWERPC_CPU(cs);
 473     CPUPPCState *cenv = &cpu->env;
 474     int ret;
 475
 476     /* Synchronize sregs with kvm */
 477     ret = kvm_arch_sync_sregs(cpu);
 478     if (ret) {
 479         if (ret == -EINVAL) {
 480             error_report("Register sync failed... If you're using kvm-hv.ko,"
 481                          " only \"-cpu host\" is possible");
 482         }
 483         return ret;
 484     }
 485
 486     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 487
 488     switch (cenv->mmu_model) {
 489     case POWERPC_MMU_BOOKE206:
 490         /* This target supports access to KVM's guest TLB */
 491         ret = kvm_booke206_tlb_init(cpu);
 492         break;
 493     case POWERPC_MMU_2_07:
 494         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 495             /* KVM-HV has transactional memory on POWER8 also without the
 496              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 497              * long as it's availble to userspace on the host. */
 498             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 499                 cap_htm = true;
 500             }
 501         }
 502         break;
 503     default:
 504         break;
 505     }
 506
 507     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 508     kvmppc_hw_debug_points_init(cenv);
 509
 510     return ret;
 511 }
 512
 513 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 514 {
 515     CPUPPCState *env = &cpu->env;
 516     CPUState *cs = CPU(cpu);
 517     struct kvm_dirty_tlb dirty_tlb;
 518     unsigned char *bitmap;
 519     int ret;
 520
 521     if (!env->kvm_sw_tlb) {
 522         return;
 523     }
 524
 525     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 526     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 527
 528     dirty_tlb.bitmap = (uintptr_t)bitmap;
 529     dirty_tlb.num_dirty = env->nb_tlb;
 530
 531     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 532     if (ret) {
 533         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 534                 __func__, strerror(-ret));
 535     }
 536
 537     g_free(bitmap);
 538 }
 539
 540 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 541 {
 542     PowerPCCPU *cpu = POWERPC_CPU(cs);
 543     CPUPPCState *env = &cpu->env;
 544     union {
 545         uint32_t u32;
 546         uint64_t u64;
 547     } val;
 548     struct kvm_one_reg reg = {
 549         .id = id,
 550         .addr = (uintptr_t) &val,
 551     };
 552     int ret;
 553
 554     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 555     if (ret != 0) {
 556         trace_kvm_failed_spr_get(spr, strerror(errno));
 557     } else {
 558         switch (id & KVM_REG_SIZE_MASK) {
 559         case KVM_REG_SIZE_U32:
 560             env->spr[spr] = val.u32;
 561             break;
 562
 563         case KVM_REG_SIZE_U64:
 564             env->spr[spr] = val.u64;
 565             break;
 566
 567         default:
 568             /* Don't handle this size yet */
 569             abort();
 570         }
 571     }
 572 }
 573
 574 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 575 {
 576     PowerPCCPU *cpu = POWERPC_CPU(cs);
 577     CPUPPCState *env = &cpu->env;
 578     union {
 579         uint32_t u32;
 580         uint64_t u64;
 581     } val;
 582     struct kvm_one_reg reg = {
 583         .id = id,
 584         .addr = (uintptr_t) &val,
 585     };
 586     int ret;
 587
 588     switch (id & KVM_REG_SIZE_MASK) {
 589     case KVM_REG_SIZE_U32:
 590         val.u32 = env->spr[spr];
 591         break;
 592
 593     case KVM_REG_SIZE_U64:
 594         val.u64 = env->spr[spr];
 595         break;
 596
 597     default:
 598         /* Don't handle this size yet */
 599         abort();
 600     }
 601
 602     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 603     if (ret != 0) {
 604         trace_kvm_failed_spr_set(spr, strerror(errno));
 605     }
 606 }
 607
 608 static int kvm_put_fp(CPUState *cs)
 609 {
 610     PowerPCCPU *cpu = POWERPC_CPU(cs);
 611     CPUPPCState *env = &cpu->env;
 612     struct kvm_one_reg reg;
 613     int i;
 614     int ret;
 615
 616     if (env->insns_flags & PPC_FLOAT) {
 617         uint64_t fpscr = env->fpscr;
 618         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 619
 620         reg.id = KVM_REG_PPC_FPSCR;
 621         reg.addr = (uintptr_t)&fpscr;
 622         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 623         if (ret < 0) {
 624             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 625             return ret;
 626         }
 627
 628         for (i = 0; i < 32; i++) {
 629             uint64_t vsr[2];
 630
 631 #ifdef HOST_WORDS_BIGENDIAN
 632             vsr[0] = float64_val(env->fpr[i]);
 633             vsr[1] = env->vsr[i];
 634 #else
 635             vsr[0] = env->vsr[i];
 636             vsr[1] = float64_val(env->fpr[i]);
 637 #endif
 638             reg.addr = (uintptr_t) &vsr;
 639             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 640
 641             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 642             if (ret < 0) {
 643                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 644                         i, strerror(errno));
 645                 return ret;
 646             }
 647         }
 648     }
 649
 650     if (env->insns_flags & PPC_ALTIVEC) {
 651         reg.id = KVM_REG_PPC_VSCR;
 652         reg.addr = (uintptr_t)&env->vscr;
 653         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 654         if (ret < 0) {
 655             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 656             return ret;
 657         }
 658
 659         for (i = 0; i < 32; i++) {
 660             reg.id = KVM_REG_PPC_VR(i);
 661             reg.addr = (uintptr_t)&env->avr[i];
 662             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 663             if (ret < 0) {
 664                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 665                 return ret;
 666             }
 667         }
 668     }
 669
 670     return 0;
 671 }
 672
 673 static int kvm_get_fp(CPUState *cs)
 674 {
 675     PowerPCCPU *cpu = POWERPC_CPU(cs);
 676     CPUPPCState *env = &cpu->env;
 677     struct kvm_one_reg reg;
 678     int i;
 679     int ret;
 680
 681     if (env->insns_flags & PPC_FLOAT) {
 682         uint64_t fpscr;
 683         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 684
 685         reg.id = KVM_REG_PPC_FPSCR;
 686         reg.addr = (uintptr_t)&fpscr;
 687         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 688         if (ret < 0) {
 689             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 690             return ret;
 691         } else {
 692             env->fpscr = fpscr;
 693         }
 694
 695         for (i = 0; i < 32; i++) {
 696             uint64_t vsr[2];
 697
 698             reg.addr = (uintptr_t) &vsr;
 699             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 700
 701             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 702             if (ret < 0) {
 703                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 704                         vsx ? "VSR" : "FPR", i, strerror(errno));
 705                 return ret;
 706             } else {
 707 #ifdef HOST_WORDS_BIGENDIAN
 708                 env->fpr[i] = vsr[0];
 709                 if (vsx) {
 710                     env->vsr[i] = vsr[1];
 711                 }
 712 #else
 713                 env->fpr[i] = vsr[1];
 714                 if (vsx) {
 715                     env->vsr[i] = vsr[0];
 716                 }
 717 #endif
 718             }
 719         }
 720     }
 721
 722     if (env->insns_flags & PPC_ALTIVEC) {
 723         reg.id = KVM_REG_PPC_VSCR;
 724         reg.addr = (uintptr_t)&env->vscr;
 725         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 726         if (ret < 0) {
 727             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 728             return ret;
 729         }
 730
 731         for (i = 0; i < 32; i++) {
 732             reg.id = KVM_REG_PPC_VR(i);
 733             reg.addr = (uintptr_t)&env->avr[i];
 734             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 735             if (ret < 0) {
 736                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 737                         i, strerror(errno));
 738                 return ret;
 739             }
 740         }
 741     }
 742
 743     return 0;
 744 }
 745
 746 #if defined(TARGET_PPC64)
 747 static int kvm_get_vpa(CPUState *cs)
 748 {
 749     PowerPCCPU *cpu = POWERPC_CPU(cs);
 750     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 751     struct kvm_one_reg reg;
 752     int ret;
 753
 754     reg.id = KVM_REG_PPC_VPA_ADDR;
 755     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 756     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 757     if (ret < 0) {
 758         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 759         return ret;
 760     }
 761
 762     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 763            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 764     reg.id = KVM_REG_PPC_VPA_SLB;
 765     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 766     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 767     if (ret < 0) {
 768         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 769                 strerror(errno));
 770         return ret;
 771     }
 772
 773     assert((uintptr_t)&spapr_cpu->dtl_size
 774            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 775     reg.id = KVM_REG_PPC_VPA_DTL;
 776     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 777     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 778     if (ret < 0) {
 779         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 780                 strerror(errno));
 781         return ret;
 782     }
 783
 784     return 0;
 785 }
 786
 787 static int kvm_put_vpa(CPUState *cs)
 788 {
 789     PowerPCCPU *cpu = POWERPC_CPU(cs);
 790     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 791     struct kvm_one_reg reg;
 792     int ret;
 793
 794     /* SLB shadow or DTL can't be registered unless a master VPA is
 795      * registered.  That means when restoring state, if a VPA *is*
 796      * registered, we need to set that up first.  If not, we need to
 797      * deregister the others before deregistering the master VPA */
 798     assert(spapr_cpu->vpa_addr
 799            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 800
 801     if (spapr_cpu->vpa_addr) {
 802         reg.id = KVM_REG_PPC_VPA_ADDR;
 803         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 804         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 805         if (ret < 0) {
 806             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 807             return ret;
 808         }
 809     }
 810
 811     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 812            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 813     reg.id = KVM_REG_PPC_VPA_SLB;
 814     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 815     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 816     if (ret < 0) {
 817         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 818         return ret;
 819     }
 820
 821     assert((uintptr_t)&spapr_cpu->dtl_size
 822            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 823     reg.id = KVM_REG_PPC_VPA_DTL;
 824     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 825     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 826     if (ret < 0) {
 827         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 828                 strerror(errno));
 829         return ret;
 830     }
 831
 832     if (!spapr_cpu->vpa_addr) {
 833         reg.id = KVM_REG_PPC_VPA_ADDR;
 834         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 835         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 836         if (ret < 0) {
 837             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 838             return ret;
 839         }
 840     }
 841
 842     return 0;
 843 }
 844 #endif /* TARGET_PPC64 */
 845
 846 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 847 {
 848     CPUPPCState *env = &cpu->env;
 849     struct kvm_sregs sregs;
 850     int i;
 851
 852     sregs.pvr = env->spr[SPR_PVR];
 853
 854     if (cpu->vhyp) {
 855         PPCVirtualHypervisorClass *vhc =
 856             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 857         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 858     } else {
 859         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 860     }
 861
 862     /* Sync SLB */
 863 #ifdef TARGET_PPC64
 864     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 865         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 866         if (env->slb[i].esid & SLB_ESID_V) {
 867             sregs.u.s.ppc64.slb[i].slbe |= i;
 868         }
 869         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 870     }
 871 #endif
 872
 873     /* Sync SRs */
 874     for (i = 0; i < 16; i++) {
 875         sregs.u.s.ppc32.sr[i] = env->sr[i];
 876     }
 877
 878     /* Sync BATs */
 879     for (i = 0; i < 8; i++) {
 880         /* Beware. We have to swap upper and lower bits here */
 881         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 882             | env->DBAT[1][i];
 883         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 884             | env->IBAT[1][i];
 885     }
 886
 887     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 888 }
 889
 890 int kvm_arch_put_registers(CPUState *cs, int level)
 891 {
 892     PowerPCCPU *cpu = POWERPC_CPU(cs);
 893     CPUPPCState *env = &cpu->env;
 894     struct kvm_regs regs;
 895     int ret;
 896     int i;
 897
 898     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 899     if (ret < 0) {
 900         return ret;
 901     }
 902
 903     regs.ctr = env->ctr;
 904     regs.lr  = env->lr;
 905     regs.xer = cpu_read_xer(env);
 906     regs.msr = env->msr;
 907     regs.pc = env->nip;
 908
 909     regs.srr0 = env->spr[SPR_SRR0];
 910     regs.srr1 = env->spr[SPR_SRR1];
 911
 912     regs.sprg0 = env->spr[SPR_SPRG0];
 913     regs.sprg1 = env->spr[SPR_SPRG1];
 914     regs.sprg2 = env->spr[SPR_SPRG2];
 915     regs.sprg3 = env->spr[SPR_SPRG3];
 916     regs.sprg4 = env->spr[SPR_SPRG4];
 917     regs.sprg5 = env->spr[SPR_SPRG5];
 918     regs.sprg6 = env->spr[SPR_SPRG6];
 919     regs.sprg7 = env->spr[SPR_SPRG7];
 920
 921     regs.pid = env->spr[SPR_BOOKE_PID];
 922
 923     for (i = 0;i < 32; i++)
 924         regs.gpr[i] = env->gpr[i];
 925
 926     regs.cr = 0;
 927     for (i = 0; i < 8; i++) {
 928         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 929     }
 930
 931     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 932     if (ret < 0)
 933         return ret;
 934
 935     kvm_put_fp(cs);
 936
 937     if (env->tlb_dirty) {
 938         kvm_sw_tlb_put(cpu);
 939         env->tlb_dirty = false;
 940     }
 941
 942     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 943         ret = kvmppc_put_books_sregs(cpu);
 944         if (ret < 0) {
 945             return ret;
 946         }
 947     }
 948
 949     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 950         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 951     }
 952
 953     if (cap_one_reg) {
 954         int i;
 955
 956         /* We deliberately ignore errors here, for kernels which have
 957          * the ONE_REG calls, but don't support the specific
 958          * registers, there's a reasonable chance things will still
 959          * work, at least until we try to migrate. */
 960         for (i = 0; i < 1024; i++) {
 961             uint64_t id = env->spr_cb[i].one_reg_id;
 962
 963             if (id != 0) {
 964                 kvm_put_one_spr(cs, id, i);
 965             }
 966         }
 967
 968 #ifdef TARGET_PPC64
 969         if (msr_ts) {
 970             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 971                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 972             }
 973             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 974                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 975             }
 976             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 977             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 978             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 979             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 980             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 986         }
 987
 988         if (cap_papr) {
 989             if (kvm_put_vpa(cs) < 0) {
 990                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 991             }
 992         }
 993
 994         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 995 #endif /* TARGET_PPC64 */
 996     }
 997
 998     return ret;
 999 }
1000
1001 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1002 {
1003      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1004 }
1005
1006 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1007 {
1008     CPUPPCState *env = &cpu->env;
1009     struct kvm_sregs sregs;
1010     int ret;
1011
1012     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1013     if (ret < 0) {
1014         return ret;
1015     }
1016
1017     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1018         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1019         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1020         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1021         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1022         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1023         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1024         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1025         env->spr[SPR_DECR] = sregs.u.e.dec;
1026         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1027         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1028         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1029     }
1030
1031     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1032         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1033         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1034         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1035         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1036         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1037     }
1038
1039     if (sregs.u.e.features & KVM_SREGS_E_64) {
1040         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1041     }
1042
1043     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1044         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1045     }
1046
1047     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1048         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1049         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1050         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1051         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1052         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1053         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1054         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1055         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1056         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1057         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1058         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1059         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1060         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1061         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1062         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1063         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1064         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1065         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1066         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1067         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1068         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1069         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1070         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1071         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1072         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1073         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1074         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1075         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1076         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1077         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1078         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1079         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1080
1081         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1082             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1083             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1084             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1085             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1086             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1087             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1088         }
1089
1090         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1091             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1092             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1093         }
1094
1095         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1096             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1097             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1098             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1099             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1100         }
1101     }
1102
1103     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1104         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1105         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1106         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1107         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1108         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1109         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1110         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1111         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1112         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1113         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1114     }
1115
1116     if (sregs.u.e.features & KVM_SREGS_EXP) {
1117         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1118     }
1119
1120     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1121         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1122         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1123     }
1124
1125     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1126         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1127         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1128         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1129
1130         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1131             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1132             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1133         }
1134     }
1135
1136     return 0;
1137 }
1138
1139 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1140 {
1141     CPUPPCState *env = &cpu->env;
1142     struct kvm_sregs sregs;
1143     int ret;
1144     int i;
1145
1146     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1147     if (ret < 0) {
1148         return ret;
1149     }
1150
1151     if (!cpu->vhyp) {
1152         ppc_store_sdr1(env, sregs.u.s.sdr1);
1153     }
1154
1155     /* Sync SLB */
1156 #ifdef TARGET_PPC64
1157     /*
1158      * The packed SLB array we get from KVM_GET_SREGS only contains
1159      * information about valid entries. So we flush our internal copy
1160      * to get rid of stale ones, then put all valid SLB entries back
1161      * in.
1162      */
1163     memset(env->slb, 0, sizeof(env->slb));
1164     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1165         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1166         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1167         /*
1168          * Only restore valid entries
1169          */
1170         if (rb & SLB_ESID_V) {
1171             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1172         }
1173     }
1174 #endif
1175
1176     /* Sync SRs */
1177     for (i = 0; i < 16; i++) {
1178         env->sr[i] = sregs.u.s.ppc32.sr[i];
1179     }
1180
1181     /* Sync BATs */
1182     for (i = 0; i < 8; i++) {
1183         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1184         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1185         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1186         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1187     }
1188
1189     return 0;
1190 }
1191
1192 int kvm_arch_get_registers(CPUState *cs)
1193 {
1194     PowerPCCPU *cpu = POWERPC_CPU(cs);
1195     CPUPPCState *env = &cpu->env;
1196     struct kvm_regs regs;
1197     uint32_t cr;
1198     int i, ret;
1199
1200     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1201     if (ret < 0)
1202         return ret;
1203
1204     cr = regs.cr;
1205     for (i = 7; i >= 0; i--) {
1206         env->crf[i] = cr & 15;
1207         cr >>= 4;
1208     }
1209
1210     env->ctr = regs.ctr;
1211     env->lr = regs.lr;
1212     cpu_write_xer(env, regs.xer);
1213     env->msr = regs.msr;
1214     env->nip = regs.pc;
1215
1216     env->spr[SPR_SRR0] = regs.srr0;
1217     env->spr[SPR_SRR1] = regs.srr1;
1218
1219     env->spr[SPR_SPRG0] = regs.sprg0;
1220     env->spr[SPR_SPRG1] = regs.sprg1;
1221     env->spr[SPR_SPRG2] = regs.sprg2;
1222     env->spr[SPR_SPRG3] = regs.sprg3;
1223     env->spr[SPR_SPRG4] = regs.sprg4;
1224     env->spr[SPR_SPRG5] = regs.sprg5;
1225     env->spr[SPR_SPRG6] = regs.sprg6;
1226     env->spr[SPR_SPRG7] = regs.sprg7;
1227
1228     env->spr[SPR_BOOKE_PID] = regs.pid;
1229
1230     for (i = 0;i < 32; i++)
1231         env->gpr[i] = regs.gpr[i];
1232
1233     kvm_get_fp(cs);
1234
1235     if (cap_booke_sregs) {
1236         ret = kvmppc_get_booke_sregs(cpu);
1237         if (ret < 0) {
1238             return ret;
1239         }
1240     }
1241
1242     if (cap_segstate) {
1243         ret = kvmppc_get_books_sregs(cpu);
1244         if (ret < 0) {
1245             return ret;
1246         }
1247     }
1248
1249     if (cap_hior) {
1250         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1251     }
1252
1253     if (cap_one_reg) {
1254         int i;
1255
1256         /* We deliberately ignore errors here, for kernels which have
1257          * the ONE_REG calls, but don't support the specific
1258          * registers, there's a reasonable chance things will still
1259          * work, at least until we try to migrate. */
1260         for (i = 0; i < 1024; i++) {
1261             uint64_t id = env->spr_cb[i].one_reg_id;
1262
1263             if (id != 0) {
1264                 kvm_get_one_spr(cs, id, i);
1265             }
1266         }
1267
1268 #ifdef TARGET_PPC64
1269         if (msr_ts) {
1270             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1271                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1272             }
1273             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1274                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1275             }
1276             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1277             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1278             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1279             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1280             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1286         }
1287
1288         if (cap_papr) {
1289             if (kvm_get_vpa(cs) < 0) {
1290                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1291             }
1292         }
1293
1294         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1295 #endif
1296     }
1297
1298     return 0;
1299 }
1300
1301 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1302 {
1303     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1304
1305     if (irq != PPC_INTERRUPT_EXT) {
1306         return 0;
1307     }
1308
1309     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1310         return 0;
1311     }
1312
1313     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1314
1315     return 0;
1316 }
1317
1318 #if defined(TARGET_PPC64)
1319 #define PPC_INPUT_INT PPC970_INPUT_INT
1320 #else
1321 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1322 #endif
1323
1324 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1325 {
1326     PowerPCCPU *cpu = POWERPC_CPU(cs);
1327     CPUPPCState *env = &cpu->env;
1328     int r;
1329     unsigned irq;
1330
1331     qemu_mutex_lock_iothread();
1332
1333     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1334      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1335     if (!cap_interrupt_level &&
1336         run->ready_for_interrupt_injection &&
1337         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1338         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1339     {
1340         /* For now KVM disregards the 'irq' argument. However, in the
1341          * future KVM could cache it in-kernel to avoid a heavyweight exit
1342          * when reading the UIC.
1343          */
1344         irq = KVM_INTERRUPT_SET;
1345
1346         DPRINTF("injected interrupt %d\n", irq);
1347         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1348         if (r < 0) {
1349             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1350         }
1351
1352         /* Always wake up soon in case the interrupt was level based */
1353         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1354                        (NANOSECONDS_PER_SECOND / 50));
1355     }
1356
1357     /* We don't know if there are more interrupts pending after this. However,
1358      * the guest will return to userspace in the course of handling this one
1359      * anyways, so we will get a chance to deliver the rest. */
1360
1361     qemu_mutex_unlock_iothread();
1362 }
1363
1364 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1365 {
1366     return MEMTXATTRS_UNSPECIFIED;
1367 }
1368
1369 int kvm_arch_process_async_events(CPUState *cs)
1370 {
1371     return cs->halted;
1372 }
1373
1374 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1375 {
1376     CPUState *cs = CPU(cpu);
1377     CPUPPCState *env = &cpu->env;
1378
1379     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1380         cs->halted = 1;
1381         cs->exception_index = EXCP_HLT;
1382     }
1383
1384     return 0;
1385 }
1386
1387 /* map dcr access to existing qemu dcr emulation */
1388 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1389 {
1390     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1391         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1392
1393     return 0;
1394 }
1395
1396 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1397 {
1398     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1399         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1400
1401     return 0;
1402 }
1403
1404 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1405 {
1406     /* Mixed endian case is not handled */
1407     uint32_t sc = debug_inst_opcode;
1408
1409     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1410                             sizeof(sc), 0) ||
1411         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1412         return -EINVAL;
1413     }
1414
1415     return 0;
1416 }
1417
1418 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1419 {
1420     uint32_t sc;
1421
1422     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1423         sc != debug_inst_opcode ||
1424         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1425                             sizeof(sc), 1)) {
1426         return -EINVAL;
1427     }
1428
1429     return 0;
1430 }
1431
1432 static int find_hw_breakpoint(target_ulong addr, int type)
1433 {
1434     int n;
1435
1436     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1437            <= ARRAY_SIZE(hw_debug_points));
1438
1439     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1440         if (hw_debug_points[n].addr == addr &&
1441              hw_debug_points[n].type == type) {
1442             return n;
1443         }
1444     }
1445
1446     return -1;
1447 }
1448
1449 static int find_hw_watchpoint(target_ulong addr, int *flag)
1450 {
1451     int n;
1452
1453     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1454     if (n >= 0) {
1455         *flag = BP_MEM_ACCESS;
1456         return n;
1457     }
1458
1459     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1460     if (n >= 0) {
1461         *flag = BP_MEM_WRITE;
1462         return n;
1463     }
1464
1465     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1466     if (n >= 0) {
1467         *flag = BP_MEM_READ;
1468         return n;
1469     }
1470
1471     return -1;
1472 }
1473
1474 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1475                                   target_ulong len, int type)
1476 {
1477     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1478         return -ENOBUFS;
1479     }
1480
1481     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1482     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1483
1484     switch (type) {
1485     case GDB_BREAKPOINT_HW:
1486         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1487             return -ENOBUFS;
1488         }
1489
1490         if (find_hw_breakpoint(addr, type) >= 0) {
1491             return -EEXIST;
1492         }
1493
1494         nb_hw_breakpoint++;
1495         break;
1496
1497     case GDB_WATCHPOINT_WRITE:
1498     case GDB_WATCHPOINT_READ:
1499     case GDB_WATCHPOINT_ACCESS:
1500         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1501             return -ENOBUFS;
1502         }
1503
1504         if (find_hw_breakpoint(addr, type) >= 0) {
1505             return -EEXIST;
1506         }
1507
1508         nb_hw_watchpoint++;
1509         break;
1510
1511     default:
1512         return -ENOSYS;
1513     }
1514
1515     return 0;
1516 }
1517
1518 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1519                                   target_ulong len, int type)
1520 {
1521     int n;
1522
1523     n = find_hw_breakpoint(addr, type);
1524     if (n < 0) {
1525         return -ENOENT;
1526     }
1527
1528     switch (type) {
1529     case GDB_BREAKPOINT_HW:
1530         nb_hw_breakpoint--;
1531         break;
1532
1533     case GDB_WATCHPOINT_WRITE:
1534     case GDB_WATCHPOINT_READ:
1535     case GDB_WATCHPOINT_ACCESS:
1536         nb_hw_watchpoint--;
1537         break;
1538
1539     default:
1540         return -ENOSYS;
1541     }
1542     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1543
1544     return 0;
1545 }
1546
1547 void kvm_arch_remove_all_hw_breakpoints(void)
1548 {
1549     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1550 }
1551
1552 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1553 {
1554     int n;
1555
1556     /* Software Breakpoint updates */
1557     if (kvm_sw_breakpoints_active(cs)) {
1558         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1559     }
1560
1561     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1562            <= ARRAY_SIZE(hw_debug_points));
1563     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1564
1565     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1566         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1567         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1568         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1569             switch (hw_debug_points[n].type) {
1570             case GDB_BREAKPOINT_HW:
1571                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1572                 break;
1573             case GDB_WATCHPOINT_WRITE:
1574                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1575                 break;
1576             case GDB_WATCHPOINT_READ:
1577                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1578                 break;
1579             case GDB_WATCHPOINT_ACCESS:
1580                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1581                                         KVMPPC_DEBUG_WATCH_READ;
1582                 break;
1583             default:
1584                 cpu_abort(cs, "Unsupported breakpoint type\n");
1585             }
1586             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1587         }
1588     }
1589 }
1590
1591 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1592 {
1593     CPUState *cs = CPU(cpu);
1594     CPUPPCState *env = &cpu->env;
1595     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1596     int handle = 0;
1597     int n;
1598     int flag = 0;
1599
1600     if (cs->singlestep_enabled) {
1601         handle = 1;
1602     } else if (arch_info->status) {
1603         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1604             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1605                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1606                 if (n >= 0) {
1607                     handle = 1;
1608                 }
1609             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1610                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1611                 n = find_hw_watchpoint(arch_info->address,  &flag);
1612                 if (n >= 0) {
1613                     handle = 1;
1614                     cs->watchpoint_hit = &hw_watchpoint;
1615                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1616                     hw_watchpoint.flags = flag;
1617                 }
1618             }
1619         }
1620     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1621         handle = 1;
1622     } else {
1623         /* QEMU is not able to handle debug exception, so inject
1624          * program exception to guest;
1625          * Yes program exception NOT debug exception !!
1626          * When QEMU is using debug resources then debug exception must
1627          * be always set. To achieve this we set MSR_DE and also set
1628          * MSRP_DEP so guest cannot change MSR_DE.
1629          * When emulating debug resource for guest we want guest
1630          * to control MSR_DE (enable/disable debug interrupt on need).
1631          * Supporting both configurations are NOT possible.
1632          * So the result is that we cannot share debug resources
1633          * between QEMU and Guest on BOOKE architecture.
1634          * In the current design QEMU gets the priority over guest,
1635          * this means that if QEMU is using debug resources then guest
1636          * cannot use them;
1637          * For software breakpoint QEMU uses a privileged instruction;
1638          * So there cannot be any reason that we are here for guest
1639          * set debug exception, only possibility is guest executed a
1640          * privileged / illegal instruction and that's why we are
1641          * injecting a program interrupt.
1642          */
1643
1644         cpu_synchronize_state(cs);
1645         /* env->nip is PC, so increment this by 4 to use
1646          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1647          */
1648         env->nip += 4;
1649         cs->exception_index = POWERPC_EXCP_PROGRAM;
1650         env->error_code = POWERPC_EXCP_INVAL;
1651         ppc_cpu_do_interrupt(cs);
1652     }
1653
1654     return handle;
1655 }
1656
1657 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1658 {
1659     PowerPCCPU *cpu = POWERPC_CPU(cs);
1660     CPUPPCState *env = &cpu->env;
1661     int ret;
1662
1663     qemu_mutex_lock_iothread();
1664
1665     switch (run->exit_reason) {
1666     case KVM_EXIT_DCR:
1667         if (run->dcr.is_write) {
1668             DPRINTF("handle dcr write\n");
1669             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1670         } else {
1671             DPRINTF("handle dcr read\n");
1672             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1673         }
1674         break;
1675     case KVM_EXIT_HLT:
1676         DPRINTF("handle halt\n");
1677         ret = kvmppc_handle_halt(cpu);
1678         break;
1679 #if defined(TARGET_PPC64)
1680     case KVM_EXIT_PAPR_HCALL:
1681         DPRINTF("handle PAPR hypercall\n");
1682         run->papr_hcall.ret = spapr_hypercall(cpu,
1683                                               run->papr_hcall.nr,
1684                                               run->papr_hcall.args);
1685         ret = 0;
1686         break;
1687 #endif
1688     case KVM_EXIT_EPR:
1689         DPRINTF("handle epr\n");
1690         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1691         ret = 0;
1692         break;
1693     case KVM_EXIT_WATCHDOG:
1694         DPRINTF("handle watchdog expiry\n");
1695         watchdog_perform_action();
1696         ret = 0;
1697         break;
1698
1699     case KVM_EXIT_DEBUG:
1700         DPRINTF("handle debug exception\n");
1701         if (kvm_handle_debug(cpu, run)) {
1702             ret = EXCP_DEBUG;
1703             break;
1704         }
1705         /* re-enter, this exception was guest-internal */
1706         ret = 0;
1707         break;
1708
1709     default:
1710         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1711         ret = -1;
1712         break;
1713     }
1714
1715     qemu_mutex_unlock_iothread();
1716     return ret;
1717 }
1718
1719 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1720 {
1721     CPUState *cs = CPU(cpu);
1722     uint32_t bits = tsr_bits;
1723     struct kvm_one_reg reg = {
1724         .id = KVM_REG_PPC_OR_TSR,
1725         .addr = (uintptr_t) &bits,
1726     };
1727
1728     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1729 }
1730
1731 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1732 {
1733
1734     CPUState *cs = CPU(cpu);
1735     uint32_t bits = tsr_bits;
1736     struct kvm_one_reg reg = {
1737         .id = KVM_REG_PPC_CLEAR_TSR,
1738         .addr = (uintptr_t) &bits,
1739     };
1740
1741     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1742 }
1743
1744 int kvmppc_set_tcr(PowerPCCPU *cpu)
1745 {
1746     CPUState *cs = CPU(cpu);
1747     CPUPPCState *env = &cpu->env;
1748     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1749
1750     struct kvm_one_reg reg = {
1751         .id = KVM_REG_PPC_TCR,
1752         .addr = (uintptr_t) &tcr,
1753     };
1754
1755     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1756 }
1757
1758 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1759 {
1760     CPUState *cs = CPU(cpu);
1761     int ret;
1762
1763     if (!kvm_enabled()) {
1764         return -1;
1765     }
1766
1767     if (!cap_ppc_watchdog) {
1768         printf("warning: KVM does not support watchdog");
1769         return -1;
1770     }
1771
1772     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1773     if (ret < 0) {
1774         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1775                 __func__, strerror(-ret));
1776         return ret;
1777     }
1778
1779     return ret;
1780 }
1781
1782 static int read_cpuinfo(const char *field, char *value, int len)
1783 {
1784     FILE *f;
1785     int ret = -1;
1786     int field_len = strlen(field);
1787     char line[512];
1788
1789     f = fopen("/proc/cpuinfo", "r");
1790     if (!f) {
1791         return -1;
1792     }
1793
1794     do {
1795         if (!fgets(line, sizeof(line), f)) {
1796             break;
1797         }
1798         if (!strncmp(line, field, field_len)) {
1799             pstrcpy(value, len, line);
1800             ret = 0;
1801             break;
1802         }
1803     } while(*line);
1804
1805     fclose(f);
1806
1807     return ret;
1808 }
1809
1810 uint32_t kvmppc_get_tbfreq(void)
1811 {
1812     char line[512];
1813     char *ns;
1814     uint32_t retval = NANOSECONDS_PER_SECOND;
1815
1816     if (read_cpuinfo("timebase", line, sizeof(line))) {
1817         return retval;
1818     }
1819
1820     if (!(ns = strchr(line, ':'))) {
1821         return retval;
1822     }
1823
1824     ns++;
1825
1826     return atoi(ns);
1827 }
1828
1829 bool kvmppc_get_host_serial(char **value)
1830 {
1831     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1832                                NULL);
1833 }
1834
1835 bool kvmppc_get_host_model(char **value)
1836 {
1837     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1838 }
1839
1840 /* Try to find a device tree node for a CPU with clock-frequency property */
1841 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1842 {
1843     struct dirent *dirp;
1844     DIR *dp;
1845
1846     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1847         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1848         return -1;
1849     }
1850
1851     buf[0] = '\0';
1852     while ((dirp = readdir(dp)) != NULL) {
1853         FILE *f;
1854         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1855                  dirp->d_name);
1856         f = fopen(buf, "r");
1857         if (f) {
1858             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1859             fclose(f);
1860             break;
1861         }
1862         buf[0] = '\0';
1863     }
1864     closedir(dp);
1865     if (buf[0] == '\0') {
1866         printf("Unknown host!\n");
1867         return -1;
1868     }
1869
1870     return 0;
1871 }
1872
1873 static uint64_t kvmppc_read_int_dt(const char *filename)
1874 {
1875     union {
1876         uint32_t v32;
1877         uint64_t v64;
1878     } u;
1879     FILE *f;
1880     int len;
1881
1882     f = fopen(filename, "rb");
1883     if (!f) {
1884         return -1;
1885     }
1886
1887     len = fread(&u, 1, sizeof(u), f);
1888     fclose(f);
1889     switch (len) {
1890     case 4:
1891         /* property is a 32-bit quantity */
1892         return be32_to_cpu(u.v32);
1893     case 8:
1894         return be64_to_cpu(u.v64);
1895     }
1896
1897     return 0;
1898 }
1899
1900 /* Read a CPU node property from the host device tree that's a single
1901  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1902  * (can't find or open the property, or doesn't understand the
1903  * format) */
1904 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1905 {
1906     char buf[PATH_MAX], *tmp;
1907     uint64_t val;
1908
1909     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1910         return -1;
1911     }
1912
1913     tmp = g_strdup_printf("%s/%s", buf, propname);
1914     val = kvmppc_read_int_dt(tmp);
1915     g_free(tmp);
1916
1917     return val;
1918 }
1919
1920 uint64_t kvmppc_get_clockfreq(void)
1921 {
1922     return kvmppc_read_int_cpu_dt("clock-frequency");
1923 }
1924
1925 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1926  {
1927      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1928      CPUState *cs = CPU(cpu);
1929
1930     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1931         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1932         return 0;
1933     }
1934
1935     return 1;
1936 }
1937
1938 int kvmppc_get_hasidle(CPUPPCState *env)
1939 {
1940     struct kvm_ppc_pvinfo pvinfo;
1941
1942     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1943         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1944         return 1;
1945     }
1946
1947     return 0;
1948 }
1949
1950 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1951 {
1952     uint32_t *hc = (uint32_t*)buf;
1953     struct kvm_ppc_pvinfo pvinfo;
1954
1955     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1956         memcpy(buf, pvinfo.hcall, buf_len);
1957         return 0;
1958     }
1959
1960     /*
1961      * Fallback to always fail hypercalls regardless of endianness:
1962      *
1963      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1964      *     li r3, -1
1965      *     b .+8       (becomes nop in wrong endian)
1966      *     bswap32(li r3, -1)
1967      */
1968
1969     hc[0] = cpu_to_be32(0x08000048);
1970     hc[1] = cpu_to_be32(0x3860ffff);
1971     hc[2] = cpu_to_be32(0x48000008);
1972     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1973
1974     return 1;
1975 }
1976
1977 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1978 {
1979     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1980 }
1981
1982 void kvmppc_enable_logical_ci_hcalls(void)
1983 {
1984     /*
1985      * FIXME: it would be nice if we could detect the cases where
1986      * we're using a device which requires the in kernel
1987      * implementation of these hcalls, but the kernel lacks them and
1988      * produce a warning.
1989      */
1990     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1991     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1992 }
1993
1994 void kvmppc_enable_set_mode_hcall(void)
1995 {
1996     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1997 }
1998
1999 void kvmppc_enable_clear_ref_mod_hcalls(void)
2000 {
2001     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2002     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2003 }
2004
2005 void kvmppc_set_papr(PowerPCCPU *cpu)
2006 {
2007     CPUState *cs = CPU(cpu);
2008     int ret;
2009
2010     if (!kvm_enabled()) {
2011         return;
2012     }
2013
2014     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2015     if (ret) {
2016         error_report("This vCPU type or KVM version does not support PAPR");
2017         exit(1);
2018     }
2019
2020     /* Update the capability flag so we sync the right information
2021      * with kvm */
2022     cap_papr = 1;
2023 }
2024
2025 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2026 {
2027     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2028 }
2029
2030 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2031 {
2032     CPUState *cs = CPU(cpu);
2033     int ret;
2034
2035     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2036     if (ret && mpic_proxy) {
2037         error_report("This KVM version does not support EPR");
2038         exit(1);
2039     }
2040 }
2041
2042 int kvmppc_smt_threads(void)
2043 {
2044     return cap_ppc_smt ? cap_ppc_smt : 1;
2045 }
2046
2047 int kvmppc_set_smt_threads(int smt)
2048 {
2049     int ret;
2050
2051     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2052     if (!ret) {
2053         cap_ppc_smt = smt;
2054     }
2055     return ret;
2056 }
2057
2058 void kvmppc_hint_smt_possible(Error **errp)
2059 {
2060     int i;
2061     GString *g;
2062     char *s;
2063
2064     assert(kvm_enabled());
2065     if (cap_ppc_smt_possible) {
2066         g = g_string_new("Available VSMT modes:");
2067         for (i = 63; i >= 0; i--) {
2068             if ((1UL << i) & cap_ppc_smt_possible) {
2069                 g_string_append_printf(g, " %lu", (1UL << i));
2070             }
2071         }
2072         s = g_string_free(g, false);
2073         error_append_hint(errp, "%s.\n", s);
2074         g_free(s);
2075     } else {
2076         error_append_hint(errp,
2077                           "This KVM seems to be too old to support VSMT.\n");
2078     }
2079 }
2080
2081
2082 #ifdef TARGET_PPC64
2083 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2084 {
2085     struct kvm_ppc_smmu_info info;
2086     long rampagesize, best_page_shift;
2087     int i;
2088
2089     /* Find the largest hardware supported page size that's less than
2090      * or equal to the (logical) backing page size of guest RAM */
2091     kvm_get_smmu_info(&info, &error_fatal);
2092     rampagesize = qemu_getrampagesize();
2093     best_page_shift = 0;
2094
2095     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2096         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2097
2098         if (!sps->page_shift) {
2099             continue;
2100         }
2101
2102         if ((sps->page_shift > best_page_shift)
2103             && ((1UL << sps->page_shift) <= rampagesize)) {
2104             best_page_shift = sps->page_shift;
2105         }
2106     }
2107
2108     return MIN(current_size,
2109                1ULL << (best_page_shift + hash_shift - 7));
2110 }
2111 #endif
2112
2113 bool kvmppc_spapr_use_multitce(void)
2114 {
2115     return cap_spapr_multitce;
2116 }
2117
2118 int kvmppc_spapr_enable_inkernel_multitce(void)
2119 {
2120     int ret;
2121
2122     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2123                             H_PUT_TCE_INDIRECT, 1);
2124     if (!ret) {
2125         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2126                                 H_STUFF_TCE, 1);
2127     }
2128
2129     return ret;
2130 }
2131
2132 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2133                               uint64_t bus_offset, uint32_t nb_table,
2134                               int *pfd, bool need_vfio)
2135 {
2136     long len;
2137     int fd;
2138     void *table;
2139
2140     /* Must set fd to -1 so we don't try to munmap when called for
2141      * destroying the table, which the upper layers -will- do
2142      */
2143     *pfd = -1;
2144     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2145         return NULL;
2146     }
2147
2148     if (cap_spapr_tce_64) {
2149         struct kvm_create_spapr_tce_64 args = {
2150             .liobn = liobn,
2151             .page_shift = page_shift,
2152             .offset = bus_offset >> page_shift,
2153             .size = nb_table,
2154             .flags = 0
2155         };
2156         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2157         if (fd < 0) {
2158             fprintf(stderr,
2159                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2160                     liobn);
2161             return NULL;
2162         }
2163     } else if (cap_spapr_tce) {
2164         uint64_t window_size = (uint64_t) nb_table << page_shift;
2165         struct kvm_create_spapr_tce args = {
2166             .liobn = liobn,
2167             .window_size = window_size,
2168         };
2169         if ((window_size != args.window_size) || bus_offset) {
2170             return NULL;
2171         }
2172         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2173         if (fd < 0) {
2174             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2175                     liobn);
2176             return NULL;
2177         }
2178     } else {
2179         return NULL;
2180     }
2181
2182     len = nb_table * sizeof(uint64_t);
2183     /* FIXME: round this up to page size */
2184
2185     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2186     if (table == MAP_FAILED) {
2187         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2188                 liobn);
2189         close(fd);
2190         return NULL;
2191     }
2192
2193     *pfd = fd;
2194     return table;
2195 }
2196
2197 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2198 {
2199     long len;
2200
2201     if (fd < 0) {
2202         return -1;
2203     }
2204
2205     len = nb_table * sizeof(uint64_t);
2206     if ((munmap(table, len) < 0) ||
2207         (close(fd) < 0)) {
2208         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2209                 strerror(errno));
2210         /* Leak the table */
2211     }
2212
2213     return 0;
2214 }
2215
2216 int kvmppc_reset_htab(int shift_hint)
2217 {
2218     uint32_t shift = shift_hint;
2219
2220     if (!kvm_enabled()) {
2221         /* Full emulation, tell caller to allocate htab itself */
2222         return 0;
2223     }
2224     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2225         int ret;
2226         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2227         if (ret == -ENOTTY) {
2228             /* At least some versions of PR KVM advertise the
2229              * capability, but don't implement the ioctl().  Oops.
2230              * Return 0 so that we allocate the htab in qemu, as is
2231              * correct for PR. */
2232             return 0;
2233         } else if (ret < 0) {
2234             return ret;
2235         }
2236         return shift;
2237     }
2238
2239     /* We have a kernel that predates the htab reset calls.  For PR
2240      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2241      * this era, it has allocated a 16MB fixed size hash table already. */
2242     if (kvmppc_is_pr(kvm_state)) {
2243         /* PR - tell caller to allocate htab */
2244         return 0;
2245     } else {
2246         /* HV - assume 16MB kernel allocated htab */
2247         return 24;
2248     }
2249 }
2250
2251 static inline uint32_t mfpvr(void)
2252 {
2253     uint32_t pvr;
2254
2255     asm ("mfpvr %0"
2256          : "=r"(pvr));
2257     return pvr;
2258 }
2259
2260 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2261 {
2262     if (on) {
2263         *word |= flags;
2264     } else {
2265         *word &= ~flags;
2266     }
2267 }
2268
2269 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2270 {
2271     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2272     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2273     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2274
2275     /* Now fix up the class with information we can query from the host */
2276     pcc->pvr = mfpvr();
2277
2278     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2279                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2280     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2281                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2282     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2283                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2284
2285     if (dcache_size != -1) {
2286         pcc->l1_dcache_size = dcache_size;
2287     }
2288
2289     if (icache_size != -1) {
2290         pcc->l1_icache_size = icache_size;
2291     }
2292
2293 #if defined(TARGET_PPC64)
2294     pcc->radix_page_info = kvm_get_radix_page_info();
2295
2296     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2297         /*
2298          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2299          * compliant.  More importantly, advertising ISA 3.00
2300          * architected mode may prevent guests from activating
2301          * necessary DD1 workarounds.
2302          */
2303         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2304                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2305     }
2306 #endif /* defined(TARGET_PPC64) */
2307 }
2308
2309 bool kvmppc_has_cap_epr(void)
2310 {
2311     return cap_epr;
2312 }
2313
2314 bool kvmppc_has_cap_fixup_hcalls(void)
2315 {
2316     return cap_fixup_hcalls;
2317 }
2318
2319 bool kvmppc_has_cap_htm(void)
2320 {
2321     return cap_htm;
2322 }
2323
2324 bool kvmppc_has_cap_mmu_radix(void)
2325 {
2326     return cap_mmu_radix;
2327 }
2328
2329 bool kvmppc_has_cap_mmu_hash_v3(void)
2330 {
2331     return cap_mmu_hash_v3;
2332 }
2333
2334 static bool kvmppc_power8_host(void)
2335 {
2336     bool ret = false;
2337 #ifdef TARGET_PPC64
2338     {
2339         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2340         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2341               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2342               (base_pvr == CPU_POWERPC_POWER8_BASE);
2343     }
2344 #endif /* TARGET_PPC64 */
2345     return ret;
2346 }
2347
2348 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2349 {
2350     bool l1d_thread_priv_req = !kvmppc_power8_host();
2351
2352     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2353         return 2;
2354     } else if ((!l1d_thread_priv_req ||
2355                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2356                (c.character & c.character_mask
2357                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2358         return 1;
2359     }
2360
2361     return 0;
2362 }
2363
2364 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2365 {
2366     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2367         return 2;
2368     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2369         return 1;
2370     }
2371
2372     return 0;
2373 }
2374
2375 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2376 {
2377     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2378         return  SPAPR_CAP_FIXED_CCD;
2379     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2380         return SPAPR_CAP_FIXED_IBS;
2381     }
2382
2383     return 0;
2384 }
2385
2386 static void kvmppc_get_cpu_characteristics(KVMState *s)
2387 {
2388     struct kvm_ppc_cpu_char c;
2389     int ret;
2390
2391     /* Assume broken */
2392     cap_ppc_safe_cache = 0;
2393     cap_ppc_safe_bounds_check = 0;
2394     cap_ppc_safe_indirect_branch = 0;
2395
2396     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2397     if (!ret) {
2398         return;
2399     }
2400     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2401     if (ret < 0) {
2402         return;
2403     }
2404
2405     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2406     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2407     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2408 }
2409
2410 int kvmppc_get_cap_safe_cache(void)
2411 {
2412     return cap_ppc_safe_cache;
2413 }
2414
2415 int kvmppc_get_cap_safe_bounds_check(void)
2416 {
2417     return cap_ppc_safe_bounds_check;
2418 }
2419
2420 int kvmppc_get_cap_safe_indirect_branch(void)
2421 {
2422     return cap_ppc_safe_indirect_branch;
2423 }
2424
2425 bool kvmppc_has_cap_spapr_vfio(void)
2426 {
2427     return cap_spapr_vfio;
2428 }
2429
2430 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2431 {
2432     uint32_t host_pvr = mfpvr();
2433     PowerPCCPUClass *pvr_pcc;
2434
2435     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2436     if (pvr_pcc == NULL) {
2437         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2438     }
2439
2440     return pvr_pcc;
2441 }
2442
2443 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2444 {
2445     TypeInfo type_info = {
2446         .name = TYPE_HOST_POWERPC_CPU,
2447         .class_init = kvmppc_host_cpu_class_init,
2448     };
2449     MachineClass *mc = MACHINE_GET_CLASS(ms);
2450     PowerPCCPUClass *pvr_pcc;
2451     ObjectClass *oc;
2452     DeviceClass *dc;
2453     int i;
2454
2455     pvr_pcc = kvm_ppc_get_host_cpu_class();
2456     if (pvr_pcc == NULL) {
2457         return -1;
2458     }
2459     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2460     type_register(&type_info);
2461     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2462         /* override TCG default cpu type with 'host' cpu model */
2463         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2464     }
2465
2466     oc = object_class_by_name(type_info.name);
2467     g_assert(oc);
2468
2469     /*
2470      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2471      * we want "POWER8" to be a "family" alias that points to the current
2472      * host CPU type, too)
2473      */
2474     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2475     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2476         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2477             char *suffix;
2478
2479             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2480             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2481             if (suffix) {
2482                 *suffix = 0;
2483             }
2484             break;
2485         }
2486     }
2487
2488     return 0;
2489 }
2490
2491 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2492 {
2493     struct kvm_rtas_token_args args = {
2494         .token = token,
2495     };
2496
2497     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2498         return -ENOENT;
2499     }
2500
2501     strncpy(args.name, function, sizeof(args.name));
2502
2503     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2504 }
2505
2506 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2507 {
2508     struct kvm_get_htab_fd s = {
2509         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2510         .start_index = index,
2511     };
2512     int ret;
2513
2514     if (!cap_htab_fd) {
2515         error_setg(errp, "KVM version doesn't support %s the HPT",
2516                    write ? "writing" : "reading");
2517         return -ENOTSUP;
2518     }
2519
2520     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2521     if (ret < 0) {
2522         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2523                    write ? "writing" : "reading", write ? "to" : "from",
2524                    strerror(errno));
2525         return -errno;
2526     }
2527
2528     return ret;
2529 }
2530
2531 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2532 {
2533     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2534     uint8_t buf[bufsize];
2535     ssize_t rc;
2536
2537     do {
2538         rc = read(fd, buf, bufsize);
2539         if (rc < 0) {
2540             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2541                     strerror(errno));
2542             return rc;
2543         } else if (rc) {
2544             uint8_t *buffer = buf;
2545             ssize_t n = rc;
2546             while (n) {
2547                 struct kvm_get_htab_header *head =
2548                     (struct kvm_get_htab_header *) buffer;
2549                 size_t chunksize = sizeof(*head) +
2550                      HASH_PTE_SIZE_64 * head->n_valid;
2551
2552                 qemu_put_be32(f, head->index);
2553                 qemu_put_be16(f, head->n_valid);
2554                 qemu_put_be16(f, head->n_invalid);
2555                 qemu_put_buffer(f, (void *)(head + 1),
2556                                 HASH_PTE_SIZE_64 * head->n_valid);
2557
2558                 buffer += chunksize;
2559                 n -= chunksize;
2560             }
2561         }
2562     } while ((rc != 0)
2563              && ((max_ns < 0)
2564                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2565
2566     return (rc == 0) ? 1 : 0;
2567 }
2568
2569 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2570                            uint16_t n_valid, uint16_t n_invalid)
2571 {
2572     struct kvm_get_htab_header *buf;
2573     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2574     ssize_t rc;
2575
2576     buf = alloca(chunksize);
2577     buf->index = index;
2578     buf->n_valid = n_valid;
2579     buf->n_invalid = n_invalid;
2580
2581     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2582
2583     rc = write(fd, buf, chunksize);
2584     if (rc < 0) {
2585         fprintf(stderr, "Error writing KVM hash table: %s\n",
2586                 strerror(errno));
2587         return rc;
2588     }
2589     if (rc != chunksize) {
2590         /* We should never get a short write on a single chunk */
2591         fprintf(stderr, "Short write, restoring KVM hash table\n");
2592         return -1;
2593     }
2594     return 0;
2595 }
2596
2597 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2598 {
2599     return true;
2600 }
2601
2602 void kvm_arch_init_irq_routing(KVMState *s)
2603 {
2604 }
2605
2606 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2607 {
2608     int fd, rc;
2609     int i;
2610
2611     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2612
2613     i = 0;
2614     while (i < n) {
2615         struct kvm_get_htab_header *hdr;
2616         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2617         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2618
2619         rc = read(fd, buf, sizeof(buf));
2620         if (rc < 0) {
2621             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2622         }
2623
2624         hdr = (struct kvm_get_htab_header *)buf;
2625         while ((i < n) && ((char *)hdr < (buf + rc))) {
2626             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2627
2628             if (hdr->index != (ptex + i)) {
2629                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2630                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2631             }
2632
2633             if (n - i < valid) {
2634                 valid = n - i;
2635             }
2636             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2637             i += valid;
2638
2639             if ((n - i) < invalid) {
2640                 invalid = n - i;
2641             }
2642             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2643             i += invalid;
2644
2645             hdr = (struct kvm_get_htab_header *)
2646                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2647         }
2648     }
2649
2650     close(fd);
2651 }
2652
2653 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2654 {
2655     int fd, rc;
2656     struct {
2657         struct kvm_get_htab_header hdr;
2658         uint64_t pte0;
2659         uint64_t pte1;
2660     } buf;
2661
2662     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2663
2664     buf.hdr.n_valid = 1;
2665     buf.hdr.n_invalid = 0;
2666     buf.hdr.index = ptex;
2667     buf.pte0 = cpu_to_be64(pte0);
2668     buf.pte1 = cpu_to_be64(pte1);
2669
2670     rc = write(fd, &buf, sizeof(buf));
2671     if (rc != sizeof(buf)) {
2672         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2673     }
2674     close(fd);
2675 }
2676
2677 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2678                              uint64_t address, uint32_t data, PCIDevice *dev)
2679 {
2680     return 0;
2681 }
2682
2683 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2684                                 int vector, PCIDevice *dev)
2685 {
2686     return 0;
2687 }
2688
2689 int kvm_arch_release_virq_post(int virq)
2690 {
2691     return 0;
2692 }
2693
2694 int kvm_arch_msi_data_to_gsi(uint32_t data)
2695 {
2696     return data & 0xffff;
2697 }
2698
2699 int kvmppc_enable_hwrng(void)
2700 {
2701     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2702         return -1;
2703     }
2704
2705     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2706 }
2707
2708 void kvmppc_check_papr_resize_hpt(Error **errp)
2709 {
2710     if (!kvm_enabled()) {
2711         return; /* No KVM, we're good */
2712     }
2713
2714     if (cap_resize_hpt) {
2715         return; /* Kernel has explicit support, we're good */
2716     }
2717
2718     /* Otherwise fallback on looking for PR KVM */
2719     if (kvmppc_is_pr(kvm_state)) {
2720         return;
2721     }
2722
2723     error_setg(errp,
2724                "Hash page table resizing not available with this KVM version");
2725 }
2726
2727 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2728 {
2729     CPUState *cs = CPU(cpu);
2730     struct kvm_ppc_resize_hpt rhpt = {
2731         .flags = flags,
2732         .shift = shift,
2733     };
2734
2735     if (!cap_resize_hpt) {
2736         return -ENOSYS;
2737     }
2738
2739     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2740 }
2741
2742 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2743 {
2744     CPUState *cs = CPU(cpu);
2745     struct kvm_ppc_resize_hpt rhpt = {
2746         .flags = flags,
2747         .shift = shift,
2748     };
2749
2750     if (!cap_resize_hpt) {
2751         return -ENOSYS;
2752     }
2753
2754     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2755 }
2756
2757 /*
2758  * This is a helper function to detect a post migration scenario
2759  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2760  * the guest kernel can't handle a PVR value other than the actual host
2761  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2762  *
2763  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2764  * (so, we're HV), return true. The workaround itself is done in
2765  * cpu_post_load.
2766  *
2767  * The order here is important: we'll only check for KVM PR as a
2768  * fallback if the guest kernel can't handle the situation itself.
2769  * We need to avoid as much as possible querying the running KVM type
2770  * in QEMU level.
2771  */
2772 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2773 {
2774     CPUState *cs = CPU(cpu);
2775
2776     if (!kvm_enabled()) {
2777         return false;
2778     }
2779
2780     if (cap_ppc_pvr_compat) {
2781         return false;
2782     }
2783
2784     return !kvmppc_is_pr(cs->kvm_state);
2785 }
2786
2787 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2788 {
2789     CPUState *cs = CPU(cpu);
2790
2791     if (kvm_enabled()) {
2792         kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2793     }
2794 }