target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu-timer.h"
  27 #include "sysemu.h"
  28 #include "kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "cpus.h"
  32 #include "device_tree.h"
  33 #include "hw/sysbus.h"
  34 #include "hw/spapr.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/spapr.h"
  38 #include "hw/spapr_vio.h"
  39
  40 //#define DEBUG_KVM
  41
  42 #ifdef DEBUG_KVM
  43 #define dprintf(fmt, ...) \
  44     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  45 #else
  46 #define dprintf(fmt, ...) \
  47     do { } while (0)
  48 #endif
  49
  50 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  51
  52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  53     KVM_CAP_LAST_INFO
  54 };
  55
  56 static int cap_interrupt_unset = false;
  57 static int cap_interrupt_level = false;
  58 static int cap_segstate;
  59 static int cap_booke_sregs;
  60 static int cap_ppc_smt;
  61 static int cap_ppc_rma;
  62 static int cap_spapr_tce;
  63 static int cap_hior;
  64
  65 /* XXX We have a race condition where we actually have a level triggered
  66  *     interrupt, but the infrastructure can't expose that yet, so the guest
  67  *     takes but ignores it, goes to sleep and never gets notified that there's
  68  *     still an interrupt pending.
  69  *
  70  *     As a quick workaround, let's just wake up again 20 ms after we injected
  71  *     an interrupt. That way we can assure that we're always reinjecting
  72  *     interrupts in case the guest swallowed them.
  73  */
  74 static QEMUTimer *idle_timer;
  75
  76 static void kvm_kick_cpu(void *opaque)
  77 {
  78     PowerPCCPU *cpu = opaque;
  79
  80     qemu_cpu_kick(CPU(cpu));
  81 }
  82
  83 int kvm_arch_init(KVMState *s)
  84 {
  85     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  86     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  87     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  88     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  89     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  90     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  91     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
  92     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
  93
  94     if (!cap_interrupt_level) {
  95         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
  96                         "VM to stall at times!\n");
  97     }
  98
  99     return 0;
 100 }
 101
 102 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
 103 {
 104     struct kvm_sregs sregs;
 105     int ret;
 106
 107     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 108         /* What we're really trying to say is "if we're on BookE, we use
 109            the native PVR for now". This is the only sane way to check
 110            it though, so we potentially confuse users that they can run
 111            BookE guests on BookS. Let's hope nobody dares enough :) */
 112         return 0;
 113     } else {
 114         if (!cap_segstate) {
 115             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 116             return -ENOSYS;
 117         }
 118     }
 119
 120     ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
 121     if (ret) {
 122         return ret;
 123     }
 124
 125     sregs.pvr = cenv->spr[SPR_PVR];
 126     return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
 127 }
 128
 129 /* Set up a shared TLB array with KVM */
 130 static int kvm_booke206_tlb_init(CPUPPCState *env)
 131 {
 132     struct kvm_book3e_206_tlb_params params = {};
 133     struct kvm_config_tlb cfg = {};
 134     struct kvm_enable_cap encap = {};
 135     unsigned int entries = 0;
 136     int ret, i;
 137
 138     if (!kvm_enabled() ||
 139         !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
 140         return 0;
 141     }
 142
 143     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 144
 145     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 146         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 147         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 148         entries += params.tlb_sizes[i];
 149     }
 150
 151     assert(entries == env->nb_tlb);
 152     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 153
 154     env->tlb_dirty = true;
 155
 156     cfg.array = (uintptr_t)env->tlb.tlbm;
 157     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 158     cfg.params = (uintptr_t)&params;
 159     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 160
 161     encap.cap = KVM_CAP_SW_TLB;
 162     encap.args[0] = (uintptr_t)&cfg;
 163
 164     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
 165     if (ret < 0) {
 166         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 167                 __func__, strerror(-ret));
 168         return ret;
 169     }
 170
 171     env->kvm_sw_tlb = true;
 172     return 0;
 173 }
 174
 175
 176 #if defined(TARGET_PPC64)
 177 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
 178                                        struct kvm_ppc_smmu_info *info)
 179 {
 180     memset(info, 0, sizeof(*info));
 181
 182     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 183      * need to "guess" what the supported page sizes are.
 184      *
 185      * For that to work we make a few assumptions:
 186      *
 187      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 188      *   KVM which only supports 4K and 16M pages, but supports them
 189      *   regardless of the backing store characteritics. We also don't
 190      *   support 1T segments.
 191      *
 192      *   This is safe as if HV KVM ever supports that capability or PR
 193      *   KVM grows supports for more page/segment sizes, those versions
 194      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 195      *   will not hit this fallback
 196      *
 197      * - Else we are running HV KVM. This means we only support page
 198      *   sizes that fit in the backing store. Additionally we only
 199      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 200      *   P7 encodings for the SLB and hash table. Here too, we assume
 201      *   support for any newer processor will mean a kernel that
 202      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 203      *   this fallback.
 204      */
 205     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 206         /* No flags */
 207         info->flags = 0;
 208         info->slb_size = 64;
 209
 210         /* Standard 4k base page size segment */
 211         info->sps[0].page_shift = 12;
 212         info->sps[0].slb_enc = 0;
 213         info->sps[0].enc[0].page_shift = 12;
 214         info->sps[0].enc[0].pte_enc = 0;
 215
 216         /* Standard 16M large page size segment */
 217         info->sps[1].page_shift = 24;
 218         info->sps[1].slb_enc = SLB_VSID_L;
 219         info->sps[1].enc[0].page_shift = 24;
 220         info->sps[1].enc[0].pte_enc = 0;
 221     } else {
 222         int i = 0;
 223
 224         /* HV KVM has backing store size restrictions */
 225         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 226
 227         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 228             info->flags |= KVM_PPC_1T_SEGMENTS;
 229         }
 230
 231         if (env->mmu_model == POWERPC_MMU_2_06) {
 232             info->slb_size = 32;
 233         } else {
 234             info->slb_size = 64;
 235         }
 236
 237         /* Standard 4k base page size segment */
 238         info->sps[i].page_shift = 12;
 239         info->sps[i].slb_enc = 0;
 240         info->sps[i].enc[0].page_shift = 12;
 241         info->sps[i].enc[0].pte_enc = 0;
 242         i++;
 243
 244         /* 64K on MMU 2.06 */
 245         if (env->mmu_model == POWERPC_MMU_2_06) {
 246             info->sps[i].page_shift = 16;
 247             info->sps[i].slb_enc = 0x110;
 248             info->sps[i].enc[0].page_shift = 16;
 249             info->sps[i].enc[0].pte_enc = 1;
 250             i++;
 251         }
 252
 253         /* Standard 16M large page size segment */
 254         info->sps[i].page_shift = 24;
 255         info->sps[i].slb_enc = SLB_VSID_L;
 256         info->sps[i].enc[0].page_shift = 24;
 257         info->sps[i].enc[0].pte_enc = 0;
 258     }
 259 }
 260
 261 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
 262 {
 263     int ret;
 264
 265     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 266         ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 267         if (ret == 0) {
 268             return;
 269         }
 270     }
 271
 272     kvm_get_fallback_smmu_info(env, info);
 273 }
 274
 275 static long getrampagesize(void)
 276 {
 277     struct statfs fs;
 278     int ret;
 279
 280     if (!mem_path) {
 281         /* guest RAM is backed by normal anonymous pages */
 282         return getpagesize();
 283     }
 284
 285     do {
 286         ret = statfs(mem_path, &fs);
 287     } while (ret != 0 && errno == EINTR);
 288
 289     if (ret != 0) {
 290         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 291                 strerror(errno));
 292         exit(1);
 293     }
 294
 295 #define HUGETLBFS_MAGIC       0x958458f6
 296
 297     if (fs.f_type != HUGETLBFS_MAGIC) {
 298         /* Explicit mempath, but it's ordinary pages */
 299         return getpagesize();
 300     }
 301
 302     /* It's hugepage, return the huge page size */
 303     return fs.f_bsize;
 304 }
 305
 306 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 307 {
 308     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 309         return true;
 310     }
 311
 312     return (1ul << shift) <= rampgsize;
 313 }
 314
 315 static void kvm_fixup_page_sizes(CPUPPCState *env)
 316 {
 317     static struct kvm_ppc_smmu_info smmu_info;
 318     static bool has_smmu_info;
 319     long rampagesize;
 320     int iq, ik, jq, jk;
 321
 322     /* We only handle page sizes for 64-bit server guests for now */
 323     if (!(env->mmu_model & POWERPC_MMU_64)) {
 324         return;
 325     }
 326
 327     /* Collect MMU info from kernel if not already */
 328     if (!has_smmu_info) {
 329         kvm_get_smmu_info(env, &smmu_info);
 330         has_smmu_info = true;
 331     }
 332
 333     rampagesize = getrampagesize();
 334
 335     /* Convert to QEMU form */
 336     memset(&env->sps, 0, sizeof(env->sps));
 337
 338     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 339         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 340         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 341
 342         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 343                                  ksps->page_shift)) {
 344             continue;
 345         }
 346         qsps->page_shift = ksps->page_shift;
 347         qsps->slb_enc = ksps->slb_enc;
 348         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 349             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 350                                      ksps->enc[jk].page_shift)) {
 351                 continue;
 352             }
 353             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 354             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 355             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 356                 break;
 357             }
 358         }
 359         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 360             break;
 361         }
 362     }
 363     env->slb_nr = smmu_info.slb_size;
 364     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 365         env->mmu_model |= POWERPC_MMU_1TSEG;
 366     } else {
 367         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 368     }
 369 }
 370 #else /* defined (TARGET_PPC64) */
 371
 372 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
 373 {
 374 }
 375
 376 #endif /* !defined (TARGET_PPC64) */
 377
 378 int kvm_arch_init_vcpu(CPUPPCState *cenv)
 379 {
 380     PowerPCCPU *cpu = ppc_env_get_cpu(cenv);
 381     int ret;
 382
 383     /* Gather server mmu info from KVM and update the CPU state */
 384     kvm_fixup_page_sizes(cenv);
 385
 386     /* Synchronize sregs with kvm */
 387     ret = kvm_arch_sync_sregs(cenv);
 388     if (ret) {
 389         return ret;
 390     }
 391
 392     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
 393
 394     /* Some targets support access to KVM's guest TLB. */
 395     switch (cenv->mmu_model) {
 396     case POWERPC_MMU_BOOKE206:
 397         ret = kvm_booke206_tlb_init(cenv);
 398         break;
 399     default:
 400         break;
 401     }
 402
 403     return ret;
 404 }
 405
 406 void kvm_arch_reset_vcpu(CPUPPCState *env)
 407 {
 408 }
 409
 410 static void kvm_sw_tlb_put(CPUPPCState *env)
 411 {
 412     struct kvm_dirty_tlb dirty_tlb;
 413     unsigned char *bitmap;
 414     int ret;
 415
 416     if (!env->kvm_sw_tlb) {
 417         return;
 418     }
 419
 420     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 421     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 422
 423     dirty_tlb.bitmap = (uintptr_t)bitmap;
 424     dirty_tlb.num_dirty = env->nb_tlb;
 425
 426     ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
 427     if (ret) {
 428         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 429                 __func__, strerror(-ret));
 430     }
 431
 432     g_free(bitmap);
 433 }
 434
 435 int kvm_arch_put_registers(CPUPPCState *env, int level)
 436 {
 437     struct kvm_regs regs;
 438     int ret;
 439     int i;
 440
 441     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 442     if (ret < 0)
 443         return ret;
 444
 445     regs.ctr = env->ctr;
 446     regs.lr  = env->lr;
 447     regs.xer = env->xer;
 448     regs.msr = env->msr;
 449     regs.pc = env->nip;
 450
 451     regs.srr0 = env->spr[SPR_SRR0];
 452     regs.srr1 = env->spr[SPR_SRR1];
 453
 454     regs.sprg0 = env->spr[SPR_SPRG0];
 455     regs.sprg1 = env->spr[SPR_SPRG1];
 456     regs.sprg2 = env->spr[SPR_SPRG2];
 457     regs.sprg3 = env->spr[SPR_SPRG3];
 458     regs.sprg4 = env->spr[SPR_SPRG4];
 459     regs.sprg5 = env->spr[SPR_SPRG5];
 460     regs.sprg6 = env->spr[SPR_SPRG6];
 461     regs.sprg7 = env->spr[SPR_SPRG7];
 462
 463     regs.pid = env->spr[SPR_BOOKE_PID];
 464
 465     for (i = 0;i < 32; i++)
 466         regs.gpr[i] = env->gpr[i];
 467
 468     ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
 469     if (ret < 0)
 470         return ret;
 471
 472     if (env->tlb_dirty) {
 473         kvm_sw_tlb_put(env);
 474         env->tlb_dirty = false;
 475     }
 476
 477     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 478         struct kvm_sregs sregs;
 479
 480         sregs.pvr = env->spr[SPR_PVR];
 481
 482         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 483
 484         /* Sync SLB */
 485 #ifdef TARGET_PPC64
 486         for (i = 0; i < 64; i++) {
 487             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 488             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 489         }
 490 #endif
 491
 492         /* Sync SRs */
 493         for (i = 0; i < 16; i++) {
 494             sregs.u.s.ppc32.sr[i] = env->sr[i];
 495         }
 496
 497         /* Sync BATs */
 498         for (i = 0; i < 8; i++) {
 499             /* Beware. We have to swap upper and lower bits here */
 500             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 501                 | env->DBAT[1][i];
 502             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 503                 | env->IBAT[1][i];
 504         }
 505
 506         ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
 507         if (ret) {
 508             return ret;
 509         }
 510     }
 511
 512     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 513         uint64_t hior = env->spr[SPR_HIOR];
 514         struct kvm_one_reg reg = {
 515             .id = KVM_REG_PPC_HIOR,
 516             .addr = (uintptr_t) &hior,
 517         };
 518
 519         ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
 520         if (ret) {
 521             return ret;
 522         }
 523     }
 524
 525     return ret;
 526 }
 527
 528 int kvm_arch_get_registers(CPUPPCState *env)
 529 {
 530     struct kvm_regs regs;
 531     struct kvm_sregs sregs;
 532     uint32_t cr;
 533     int i, ret;
 534
 535     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 536     if (ret < 0)
 537         return ret;
 538
 539     cr = regs.cr;
 540     for (i = 7; i >= 0; i--) {
 541         env->crf[i] = cr & 15;
 542         cr >>= 4;
 543     }
 544
 545     env->ctr = regs.ctr;
 546     env->lr = regs.lr;
 547     env->xer = regs.xer;
 548     env->msr = regs.msr;
 549     env->nip = regs.pc;
 550
 551     env->spr[SPR_SRR0] = regs.srr0;
 552     env->spr[SPR_SRR1] = regs.srr1;
 553
 554     env->spr[SPR_SPRG0] = regs.sprg0;
 555     env->spr[SPR_SPRG1] = regs.sprg1;
 556     env->spr[SPR_SPRG2] = regs.sprg2;
 557     env->spr[SPR_SPRG3] = regs.sprg3;
 558     env->spr[SPR_SPRG4] = regs.sprg4;
 559     env->spr[SPR_SPRG5] = regs.sprg5;
 560     env->spr[SPR_SPRG6] = regs.sprg6;
 561     env->spr[SPR_SPRG7] = regs.sprg7;
 562
 563     env->spr[SPR_BOOKE_PID] = regs.pid;
 564
 565     for (i = 0;i < 32; i++)
 566         env->gpr[i] = regs.gpr[i];
 567
 568     if (cap_booke_sregs) {
 569         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 570         if (ret < 0) {
 571             return ret;
 572         }
 573
 574         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 575             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 576             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 577             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 578             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 579             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 580             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 581             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 582             env->spr[SPR_DECR] = sregs.u.e.dec;
 583             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 584             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 585             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 586         }
 587
 588         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 589             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 590             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 591             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 592             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 593             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 594         }
 595
 596         if (sregs.u.e.features & KVM_SREGS_E_64) {
 597             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 598         }
 599
 600         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 601             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 602         }
 603
 604         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 605             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 606             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 607             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 608             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 609             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 610             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 611             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 612             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 613             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 614             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 615             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 616             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 617             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 618             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 619             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 620             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 621
 622             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 623                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 624                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 625                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 626             }
 627
 628             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 629                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 630             }
 631
 632             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 633                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 634                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 635             }
 636         }
 637
 638         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 639             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 640             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 641             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 642             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 643             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 644             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 645             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 646             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 647             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 648             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 649         }
 650
 651         if (sregs.u.e.features & KVM_SREGS_EXP) {
 652             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
 653         }
 654
 655         if (sregs.u.e.features & KVM_SREGS_E_PD) {
 656             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
 657             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
 658         }
 659
 660         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
 661             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
 662             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
 663             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
 664
 665             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
 666                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
 667                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
 668             }
 669         }
 670     }
 671
 672     if (cap_segstate) {
 673         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 674         if (ret < 0) {
 675             return ret;
 676         }
 677
 678         ppc_store_sdr1(env, sregs.u.s.sdr1);
 679
 680         /* Sync SLB */
 681 #ifdef TARGET_PPC64
 682         for (i = 0; i < 64; i++) {
 683             ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
 684                                sregs.u.s.ppc64.slb[i].slbv);
 685         }
 686 #endif
 687
 688         /* Sync SRs */
 689         for (i = 0; i < 16; i++) {
 690             env->sr[i] = sregs.u.s.ppc32.sr[i];
 691         }
 692
 693         /* Sync BATs */
 694         for (i = 0; i < 8; i++) {
 695             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
 696             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
 697             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
 698             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
 699         }
 700     }
 701
 702     return 0;
 703 }
 704
 705 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
 706 {
 707     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 708
 709     if (irq != PPC_INTERRUPT_EXT) {
 710         return 0;
 711     }
 712
 713     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
 714         return 0;
 715     }
 716
 717     kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
 718
 719     return 0;
 720 }
 721
 722 #if defined(TARGET_PPCEMB)
 723 #define PPC_INPUT_INT PPC40x_INPUT_INT
 724 #elif defined(TARGET_PPC64)
 725 #define PPC_INPUT_INT PPC970_INPUT_INT
 726 #else
 727 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 728 #endif
 729
 730 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
 731 {
 732     int r;
 733     unsigned irq;
 734
 735     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
 736      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
 737     if (!cap_interrupt_level &&
 738         run->ready_for_interrupt_injection &&
 739         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
 740         (env->irq_input_state & (1<<PPC_INPUT_INT)))
 741     {
 742         /* For now KVM disregards the 'irq' argument. However, in the
 743          * future KVM could cache it in-kernel to avoid a heavyweight exit
 744          * when reading the UIC.
 745          */
 746         irq = KVM_INTERRUPT_SET;
 747
 748         dprintf("injected interrupt %d\n", irq);
 749         r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
 750         if (r < 0)
 751             printf("cpu %d fail inject %x\n", env->cpu_index, irq);
 752
 753         /* Always wake up soon in case the interrupt was level based */
 754         qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
 755                        (get_ticks_per_sec() / 50));
 756     }
 757
 758     /* We don't know if there are more interrupts pending after this. However,
 759      * the guest will return to userspace in the course of handling this one
 760      * anyways, so we will get a chance to deliver the rest. */
 761 }
 762
 763 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
 764 {
 765 }
 766
 767 int kvm_arch_process_async_events(CPUPPCState *env)
 768 {
 769     return env->halted;
 770 }
 771
 772 static int kvmppc_handle_halt(CPUPPCState *env)
 773 {
 774     if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
 775         env->halted = 1;
 776         env->exception_index = EXCP_HLT;
 777     }
 778
 779     return 0;
 780 }
 781
 782 /* map dcr access to existing qemu dcr emulation */
 783 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
 784 {
 785     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
 786         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
 787
 788     return 0;
 789 }
 790
 791 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
 792 {
 793     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
 794         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
 795
 796     return 0;
 797 }
 798
 799 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
 800 {
 801     int ret;
 802
 803     switch (run->exit_reason) {
 804     case KVM_EXIT_DCR:
 805         if (run->dcr.is_write) {
 806             dprintf("handle dcr write\n");
 807             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
 808         } else {
 809             dprintf("handle dcr read\n");
 810             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
 811         }
 812         break;
 813     case KVM_EXIT_HLT:
 814         dprintf("handle halt\n");
 815         ret = kvmppc_handle_halt(env);
 816         break;
 817 #ifdef CONFIG_PSERIES
 818     case KVM_EXIT_PAPR_HCALL:
 819         dprintf("handle PAPR hypercall\n");
 820         run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
 821                                               run->papr_hcall.args);
 822         ret = 0;
 823         break;
 824 #endif
 825     default:
 826         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
 827         ret = -1;
 828         break;
 829     }
 830
 831     return ret;
 832 }
 833
 834 static int read_cpuinfo(const char *field, char *value, int len)
 835 {
 836     FILE *f;
 837     int ret = -1;
 838     int field_len = strlen(field);
 839     char line[512];
 840
 841     f = fopen("/proc/cpuinfo", "r");
 842     if (!f) {
 843         return -1;
 844     }
 845
 846     do {
 847         if(!fgets(line, sizeof(line), f)) {
 848             break;
 849         }
 850         if (!strncmp(line, field, field_len)) {
 851             pstrcpy(value, len, line);
 852             ret = 0;
 853             break;
 854         }
 855     } while(*line);
 856
 857     fclose(f);
 858
 859     return ret;
 860 }
 861
 862 uint32_t kvmppc_get_tbfreq(void)
 863 {
 864     char line[512];
 865     char *ns;
 866     uint32_t retval = get_ticks_per_sec();
 867
 868     if (read_cpuinfo("timebase", line, sizeof(line))) {
 869         return retval;
 870     }
 871
 872     if (!(ns = strchr(line, ':'))) {
 873         return retval;
 874     }
 875
 876     ns++;
 877
 878     retval = atoi(ns);
 879     return retval;
 880 }
 881
 882 /* Try to find a device tree node for a CPU with clock-frequency property */
 883 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 884 {
 885     struct dirent *dirp;
 886     DIR *dp;
 887
 888     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
 889         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
 890         return -1;
 891     }
 892
 893     buf[0] = '\0';
 894     while ((dirp = readdir(dp)) != NULL) {
 895         FILE *f;
 896         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
 897                  dirp->d_name);
 898         f = fopen(buf, "r");
 899         if (f) {
 900             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
 901             fclose(f);
 902             break;
 903         }
 904         buf[0] = '\0';
 905     }
 906     closedir(dp);
 907     if (buf[0] == '\0') {
 908         printf("Unknown host!\n");
 909         return -1;
 910     }
 911
 912     return 0;
 913 }
 914
 915 /* Read a CPU node property from the host device tree that's a single
 916  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
 917  * (can't find or open the property, or doesn't understand the
 918  * format) */
 919 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
 920 {
 921     char buf[PATH_MAX];
 922     union {
 923         uint32_t v32;
 924         uint64_t v64;
 925     } u;
 926     FILE *f;
 927     int len;
 928
 929     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
 930         return -1;
 931     }
 932
 933     strncat(buf, "/", sizeof(buf) - strlen(buf));
 934     strncat(buf, propname, sizeof(buf) - strlen(buf));
 935
 936     f = fopen(buf, "rb");
 937     if (!f) {
 938         return -1;
 939     }
 940
 941     len = fread(&u, 1, sizeof(u), f);
 942     fclose(f);
 943     switch (len) {
 944     case 4:
 945         /* property is a 32-bit quantity */
 946         return be32_to_cpu(u.v32);
 947     case 8:
 948         return be64_to_cpu(u.v64);
 949     }
 950
 951     return 0;
 952 }
 953
 954 uint64_t kvmppc_get_clockfreq(void)
 955 {
 956     return kvmppc_read_int_cpu_dt("clock-frequency");
 957 }
 958
 959 uint32_t kvmppc_get_vmx(void)
 960 {
 961     return kvmppc_read_int_cpu_dt("ibm,vmx");
 962 }
 963
 964 uint32_t kvmppc_get_dfp(void)
 965 {
 966     return kvmppc_read_int_cpu_dt("ibm,dfp");
 967 }
 968
 969 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 970 {
 971     uint32_t *hc = (uint32_t*)buf;
 972
 973     struct kvm_ppc_pvinfo pvinfo;
 974
 975     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
 976         !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
 977         memcpy(buf, pvinfo.hcall, buf_len);
 978
 979         return 0;
 980     }
 981
 982     /*
 983      * Fallback to always fail hypercalls:
 984      *
 985      *     li r3, -1
 986      *     nop
 987      *     nop
 988      *     nop
 989      */
 990
 991     hc[0] = 0x3860ffff;
 992     hc[1] = 0x60000000;
 993     hc[2] = 0x60000000;
 994     hc[3] = 0x60000000;
 995
 996     return 0;
 997 }
 998
 999 void kvmppc_set_papr(CPUPPCState *env)
1000 {
1001     struct kvm_enable_cap cap = {};
1002     int ret;
1003
1004     cap.cap = KVM_CAP_PPC_PAPR;
1005     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1006
1007     if (ret) {
1008         cpu_abort(env, "This KVM version does not support PAPR\n");
1009     }
1010 }
1011
1012 int kvmppc_smt_threads(void)
1013 {
1014     return cap_ppc_smt ? cap_ppc_smt : 1;
1015 }
1016
1017 #ifdef TARGET_PPC64
1018 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1019 {
1020     void *rma;
1021     off_t size;
1022     int fd;
1023     struct kvm_allocate_rma ret;
1024     MemoryRegion *rma_region;
1025
1026     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1027      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1028      *                      not necessary on this hardware
1029      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1030      *
1031      * FIXME: We should allow the user to force contiguous RMA
1032      * allocation in the cap_ppc_rma==1 case.
1033      */
1034     if (cap_ppc_rma < 2) {
1035         return 0;
1036     }
1037
1038     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1039     if (fd < 0) {
1040         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1041                 strerror(errno));
1042         return -1;
1043     }
1044
1045     size = MIN(ret.rma_size, 256ul << 20);
1046
1047     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1048     if (rma == MAP_FAILED) {
1049         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1050         return -1;
1051     };
1052
1053     rma_region = g_new(MemoryRegion, 1);
1054     memory_region_init_ram_ptr(rma_region, name, size, rma);
1055     vmstate_register_ram_global(rma_region);
1056     memory_region_add_subregion(sysmem, 0, rma_region);
1057
1058     return size;
1059 }
1060
1061 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1062 {
1063     if (cap_ppc_rma >= 2) {
1064         return current_size;
1065     }
1066     return MIN(current_size,
1067                getrampagesize() << (hash_shift - 7));
1068 }
1069 #endif
1070
1071 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1072 {
1073     struct kvm_create_spapr_tce args = {
1074         .liobn = liobn,
1075         .window_size = window_size,
1076     };
1077     long len;
1078     int fd;
1079     void *table;
1080
1081     /* Must set fd to -1 so we don't try to munmap when called for
1082      * destroying the table, which the upper layers -will- do
1083      */
1084     *pfd = -1;
1085     if (!cap_spapr_tce) {
1086         return NULL;
1087     }
1088
1089     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1090     if (fd < 0) {
1091         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1092                 liobn);
1093         return NULL;
1094     }
1095
1096     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1097     /* FIXME: round this up to page size */
1098
1099     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1100     if (table == MAP_FAILED) {
1101         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1102                 liobn);
1103         close(fd);
1104         return NULL;
1105     }
1106
1107     *pfd = fd;
1108     return table;
1109 }
1110
1111 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1112 {
1113     long len;
1114
1115     if (fd < 0) {
1116         return -1;
1117     }
1118
1119     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1120     if ((munmap(table, len) < 0) ||
1121         (close(fd) < 0)) {
1122         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1123                 strerror(errno));
1124         /* Leak the table */
1125     }
1126
1127     return 0;
1128 }
1129
1130 int kvmppc_reset_htab(int shift_hint)
1131 {
1132     uint32_t shift = shift_hint;
1133
1134     if (!kvm_enabled()) {
1135         /* Full emulation, tell caller to allocate htab itself */
1136         return 0;
1137     }
1138     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1139         int ret;
1140         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1141         if (ret == -ENOTTY) {
1142             /* At least some versions of PR KVM advertise the
1143              * capability, but don't implement the ioctl().  Oops.
1144              * Return 0 so that we allocate the htab in qemu, as is
1145              * correct for PR. */
1146             return 0;
1147         } else if (ret < 0) {
1148             return ret;
1149         }
1150         return shift;
1151     }
1152
1153     /* We have a kernel that predates the htab reset calls.  For PR
1154      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1155      * this era, it has allocated a 16MB fixed size hash table
1156      * already.  Kernels of this era have the GET_PVINFO capability
1157      * only on PR, so we use this hack to determine the right
1158      * answer */
1159     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1160         /* PR - tell caller to allocate htab */
1161         return 0;
1162     } else {
1163         /* HV - assume 16MB kernel allocated htab */
1164         return 24;
1165     }
1166 }
1167
1168 static inline uint32_t mfpvr(void)
1169 {
1170     uint32_t pvr;
1171
1172     asm ("mfpvr %0"
1173          : "=r"(pvr));
1174     return pvr;
1175 }
1176
1177 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1178 {
1179     if (on) {
1180         *word |= flags;
1181     } else {
1182         *word &= ~flags;
1183     }
1184 }
1185
1186 const ppc_def_t *kvmppc_host_cpu_def(void)
1187 {
1188     uint32_t host_pvr = mfpvr();
1189     const ppc_def_t *base_spec;
1190     ppc_def_t *spec;
1191     uint32_t vmx = kvmppc_get_vmx();
1192     uint32_t dfp = kvmppc_get_dfp();
1193
1194     base_spec = ppc_find_by_pvr(host_pvr);
1195
1196     spec = g_malloc0(sizeof(*spec));
1197     memcpy(spec, base_spec, sizeof(*spec));
1198
1199     /* Now fix up the spec with information we can query from the host */
1200
1201     if (vmx != -1) {
1202         /* Only override when we know what the host supports */
1203         alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1204         alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1205     }
1206     if (dfp != -1) {
1207         /* Only override when we know what the host supports */
1208         alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1209     }
1210
1211     return spec;
1212 }
1213
1214 int kvmppc_fixup_cpu(CPUPPCState *env)
1215 {
1216     int smt;
1217
1218     /* Adjust cpu index for SMT */
1219     smt = kvmppc_smt_threads();
1220     env->cpu_index = (env->cpu_index / smp_threads) * smt
1221         + (env->cpu_index % smp_threads);
1222
1223     return 0;
1224 }
1225
1226
1227 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1228 {
1229     return true;
1230 }
1231
1232 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1233 {
1234     return 1;
1235 }
1236
1237 int kvm_arch_on_sigbus(int code, void *addr)
1238 {
1239     return 1;
1240 }