target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu-timer.h"
  27 #include "sysemu.h"
  28 #include "kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "cpus.h"
  32 #include "device_tree.h"
  33 #include "hw/sysbus.h"
  34 #include "hw/spapr.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/spapr.h"
  38 #include "hw/spapr_vio.h"
  39
  40 //#define DEBUG_KVM
  41
  42 #ifdef DEBUG_KVM
  43 #define dprintf(fmt, ...) \
  44     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  45 #else
  46 #define dprintf(fmt, ...) \
  47     do { } while (0)
  48 #endif
  49
  50 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  51
  52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  53     KVM_CAP_LAST_INFO
  54 };
  55
  56 static int cap_interrupt_unset = false;
  57 static int cap_interrupt_level = false;
  58 static int cap_segstate;
  59 static int cap_booke_sregs;
  60 static int cap_ppc_smt;
  61 static int cap_ppc_rma;
  62 static int cap_spapr_tce;
  63 static int cap_hior;
  64
  65 /* XXX We have a race condition where we actually have a level triggered
  66  *     interrupt, but the infrastructure can't expose that yet, so the guest
  67  *     takes but ignores it, goes to sleep and never gets notified that there's
  68  *     still an interrupt pending.
  69  *
  70  *     As a quick workaround, let's just wake up again 20 ms after we injected
  71  *     an interrupt. That way we can assure that we're always reinjecting
  72  *     interrupts in case the guest swallowed them.
  73  */
  74 static QEMUTimer *idle_timer;
  75
  76 static void kvm_kick_cpu(void *opaque)
  77 {
  78     PowerPCCPU *cpu = opaque;
  79     CPUPPCState *env = &cpu->env;
  80
  81     qemu_cpu_kick(env);
  82 }
  83
  84 int kvm_arch_init(KVMState *s)
  85 {
  86     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  87     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  88     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  89     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  90     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  91     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  92     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
  93     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
  94
  95     if (!cap_interrupt_level) {
  96         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
  97                         "VM to stall at times!\n");
  98     }
  99
 100     return 0;
 101 }
 102
 103 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
 104 {
 105     struct kvm_sregs sregs;
 106     int ret;
 107
 108     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 109         /* What we're really trying to say is "if we're on BookE, we use
 110            the native PVR for now". This is the only sane way to check
 111            it though, so we potentially confuse users that they can run
 112            BookE guests on BookS. Let's hope nobody dares enough :) */
 113         return 0;
 114     } else {
 115         if (!cap_segstate) {
 116             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 117             return -ENOSYS;
 118         }
 119     }
 120
 121     ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
 122     if (ret) {
 123         return ret;
 124     }
 125
 126     sregs.pvr = cenv->spr[SPR_PVR];
 127     return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
 128 }
 129
 130 /* Set up a shared TLB array with KVM */
 131 static int kvm_booke206_tlb_init(CPUPPCState *env)
 132 {
 133     struct kvm_book3e_206_tlb_params params = {};
 134     struct kvm_config_tlb cfg = {};
 135     struct kvm_enable_cap encap = {};
 136     unsigned int entries = 0;
 137     int ret, i;
 138
 139     if (!kvm_enabled() ||
 140         !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
 141         return 0;
 142     }
 143
 144     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 145
 146     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 147         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 148         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 149         entries += params.tlb_sizes[i];
 150     }
 151
 152     assert(entries == env->nb_tlb);
 153     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 154
 155     env->tlb_dirty = true;
 156
 157     cfg.array = (uintptr_t)env->tlb.tlbm;
 158     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 159     cfg.params = (uintptr_t)&params;
 160     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 161
 162     encap.cap = KVM_CAP_SW_TLB;
 163     encap.args[0] = (uintptr_t)&cfg;
 164
 165     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
 166     if (ret < 0) {
 167         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 168                 __func__, strerror(-ret));
 169         return ret;
 170     }
 171
 172     env->kvm_sw_tlb = true;
 173     return 0;
 174 }
 175
 176
 177 #if defined(TARGET_PPC64)
 178 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
 179                                        struct kvm_ppc_smmu_info *info)
 180 {
 181     memset(info, 0, sizeof(*info));
 182
 183     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 184      * need to "guess" what the supported page sizes are.
 185      *
 186      * For that to work we make a few assumptions:
 187      *
 188      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 189      *   KVM which only supports 4K and 16M pages, but supports them
 190      *   regardless of the backing store characteritics. We also don't
 191      *   support 1T segments.
 192      *
 193      *   This is safe as if HV KVM ever supports that capability or PR
 194      *   KVM grows supports for more page/segment sizes, those versions
 195      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 196      *   will not hit this fallback
 197      *
 198      * - Else we are running HV KVM. This means we only support page
 199      *   sizes that fit in the backing store. Additionally we only
 200      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 201      *   P7 encodings for the SLB and hash table. Here too, we assume
 202      *   support for any newer processor will mean a kernel that
 203      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 204      *   this fallback.
 205      */
 206     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 207         /* No flags */
 208         info->flags = 0;
 209         info->slb_size = 64;
 210
 211         /* Standard 4k base page size segment */
 212         info->sps[0].page_shift = 12;
 213         info->sps[0].slb_enc = 0;
 214         info->sps[0].enc[0].page_shift = 12;
 215         info->sps[0].enc[0].pte_enc = 0;
 216
 217         /* Standard 16M large page size segment */
 218         info->sps[1].page_shift = 24;
 219         info->sps[1].slb_enc = SLB_VSID_L;
 220         info->sps[1].enc[0].page_shift = 24;
 221         info->sps[1].enc[0].pte_enc = 0;
 222     } else {
 223         int i = 0;
 224
 225         /* HV KVM has backing store size restrictions */
 226         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 227
 228         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 229             info->flags |= KVM_PPC_1T_SEGMENTS;
 230         }
 231
 232         if (env->mmu_model == POWERPC_MMU_2_06) {
 233             info->slb_size = 32;
 234         } else {
 235             info->slb_size = 64;
 236         }
 237
 238         /* Standard 4k base page size segment */
 239         info->sps[i].page_shift = 12;
 240         info->sps[i].slb_enc = 0;
 241         info->sps[i].enc[0].page_shift = 12;
 242         info->sps[i].enc[0].pte_enc = 0;
 243         i++;
 244
 245         /* 64K on MMU 2.06 */
 246         if (env->mmu_model == POWERPC_MMU_2_06) {
 247             info->sps[i].page_shift = 16;
 248             info->sps[i].slb_enc = 0x110;
 249             info->sps[i].enc[0].page_shift = 16;
 250             info->sps[i].enc[0].pte_enc = 1;
 251             i++;
 252         }
 253
 254         /* Standard 16M large page size segment */
 255         info->sps[i].page_shift = 24;
 256         info->sps[i].slb_enc = SLB_VSID_L;
 257         info->sps[i].enc[0].page_shift = 24;
 258         info->sps[i].enc[0].pte_enc = 0;
 259     }
 260 }
 261
 262 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
 263 {
 264     int ret;
 265
 266     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 267         ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 268         if (ret == 0) {
 269             return;
 270         }
 271     }
 272
 273     kvm_get_fallback_smmu_info(env, info);
 274 }
 275
 276 static long getrampagesize(void)
 277 {
 278     struct statfs fs;
 279     int ret;
 280
 281     if (!mem_path) {
 282         /* guest RAM is backed by normal anonymous pages */
 283         return getpagesize();
 284     }
 285
 286     do {
 287         ret = statfs(mem_path, &fs);
 288     } while (ret != 0 && errno == EINTR);
 289
 290     if (ret != 0) {
 291         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 292                 strerror(errno));
 293         exit(1);
 294     }
 295
 296 #define HUGETLBFS_MAGIC       0x958458f6
 297
 298     if (fs.f_type != HUGETLBFS_MAGIC) {
 299         /* Explicit mempath, but it's ordinary pages */
 300         return getpagesize();
 301     }
 302
 303     /* It's hugepage, return the huge page size */
 304     return fs.f_bsize;
 305 }
 306
 307 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 308 {
 309     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 310         return true;
 311     }
 312
 313     return (1ul << shift) <= rampgsize;
 314 }
 315
 316 static void kvm_fixup_page_sizes(CPUPPCState *env)
 317 {
 318     static struct kvm_ppc_smmu_info smmu_info;
 319     static bool has_smmu_info;
 320     long rampagesize;
 321     int iq, ik, jq, jk;
 322
 323     /* We only handle page sizes for 64-bit server guests for now */
 324     if (!(env->mmu_model & POWERPC_MMU_64)) {
 325         return;
 326     }
 327
 328     /* Collect MMU info from kernel if not already */
 329     if (!has_smmu_info) {
 330         kvm_get_smmu_info(env, &smmu_info);
 331         has_smmu_info = true;
 332     }
 333
 334     rampagesize = getrampagesize();
 335
 336     /* Convert to QEMU form */
 337     memset(&env->sps, 0, sizeof(env->sps));
 338
 339     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 340         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 341         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 342
 343         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 344                                  ksps->page_shift)) {
 345             continue;
 346         }
 347         qsps->page_shift = ksps->page_shift;
 348         qsps->slb_enc = ksps->slb_enc;
 349         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 350             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 351                                      ksps->enc[jk].page_shift)) {
 352                 continue;
 353             }
 354             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 355             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 356             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 357                 break;
 358             }
 359         }
 360         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 361             break;
 362         }
 363     }
 364     env->slb_nr = smmu_info.slb_size;
 365     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 366         env->mmu_model |= POWERPC_MMU_1TSEG;
 367     } else {
 368         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 369     }
 370 }
 371 #else /* defined (TARGET_PPC64) */
 372
 373 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
 374 {
 375 }
 376
 377 #endif /* !defined (TARGET_PPC64) */
 378
 379 int kvm_arch_init_vcpu(CPUPPCState *cenv)
 380 {
 381     PowerPCCPU *cpu = ppc_env_get_cpu(cenv);
 382     int ret;
 383
 384     /* Gather server mmu info from KVM and update the CPU state */
 385     kvm_fixup_page_sizes(cenv);
 386
 387     /* Synchronize sregs with kvm */
 388     ret = kvm_arch_sync_sregs(cenv);
 389     if (ret) {
 390         return ret;
 391     }
 392
 393     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
 394
 395     /* Some targets support access to KVM's guest TLB. */
 396     switch (cenv->mmu_model) {
 397     case POWERPC_MMU_BOOKE206:
 398         ret = kvm_booke206_tlb_init(cenv);
 399         break;
 400     default:
 401         break;
 402     }
 403
 404     return ret;
 405 }
 406
 407 void kvm_arch_reset_vcpu(CPUPPCState *env)
 408 {
 409 }
 410
 411 static void kvm_sw_tlb_put(CPUPPCState *env)
 412 {
 413     struct kvm_dirty_tlb dirty_tlb;
 414     unsigned char *bitmap;
 415     int ret;
 416
 417     if (!env->kvm_sw_tlb) {
 418         return;
 419     }
 420
 421     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 422     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 423
 424     dirty_tlb.bitmap = (uintptr_t)bitmap;
 425     dirty_tlb.num_dirty = env->nb_tlb;
 426
 427     ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
 428     if (ret) {
 429         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 430                 __func__, strerror(-ret));
 431     }
 432
 433     g_free(bitmap);
 434 }
 435
 436 int kvm_arch_put_registers(CPUPPCState *env, int level)
 437 {
 438     struct kvm_regs regs;
 439     int ret;
 440     int i;
 441
 442     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 443     if (ret < 0)
 444         return ret;
 445
 446     regs.ctr = env->ctr;
 447     regs.lr  = env->lr;
 448     regs.xer = env->xer;
 449     regs.msr = env->msr;
 450     regs.pc = env->nip;
 451
 452     regs.srr0 = env->spr[SPR_SRR0];
 453     regs.srr1 = env->spr[SPR_SRR1];
 454
 455     regs.sprg0 = env->spr[SPR_SPRG0];
 456     regs.sprg1 = env->spr[SPR_SPRG1];
 457     regs.sprg2 = env->spr[SPR_SPRG2];
 458     regs.sprg3 = env->spr[SPR_SPRG3];
 459     regs.sprg4 = env->spr[SPR_SPRG4];
 460     regs.sprg5 = env->spr[SPR_SPRG5];
 461     regs.sprg6 = env->spr[SPR_SPRG6];
 462     regs.sprg7 = env->spr[SPR_SPRG7];
 463
 464     regs.pid = env->spr[SPR_BOOKE_PID];
 465
 466     for (i = 0;i < 32; i++)
 467         regs.gpr[i] = env->gpr[i];
 468
 469     ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
 470     if (ret < 0)
 471         return ret;
 472
 473     if (env->tlb_dirty) {
 474         kvm_sw_tlb_put(env);
 475         env->tlb_dirty = false;
 476     }
 477
 478     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 479         struct kvm_sregs sregs;
 480
 481         sregs.pvr = env->spr[SPR_PVR];
 482
 483         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 484
 485         /* Sync SLB */
 486 #ifdef TARGET_PPC64
 487         for (i = 0; i < 64; i++) {
 488             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 489             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 490         }
 491 #endif
 492
 493         /* Sync SRs */
 494         for (i = 0; i < 16; i++) {
 495             sregs.u.s.ppc32.sr[i] = env->sr[i];
 496         }
 497
 498         /* Sync BATs */
 499         for (i = 0; i < 8; i++) {
 500             /* Beware. We have to swap upper and lower bits here */
 501             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 502                 | env->DBAT[1][i];
 503             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 504                 | env->IBAT[1][i];
 505         }
 506
 507         ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
 508         if (ret) {
 509             return ret;
 510         }
 511     }
 512
 513     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 514         uint64_t hior = env->spr[SPR_HIOR];
 515         struct kvm_one_reg reg = {
 516             .id = KVM_REG_PPC_HIOR,
 517             .addr = (uintptr_t) &hior,
 518         };
 519
 520         ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
 521         if (ret) {
 522             return ret;
 523         }
 524     }
 525
 526     return ret;
 527 }
 528
 529 int kvm_arch_get_registers(CPUPPCState *env)
 530 {
 531     struct kvm_regs regs;
 532     struct kvm_sregs sregs;
 533     uint32_t cr;
 534     int i, ret;
 535
 536     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 537     if (ret < 0)
 538         return ret;
 539
 540     cr = regs.cr;
 541     for (i = 7; i >= 0; i--) {
 542         env->crf[i] = cr & 15;
 543         cr >>= 4;
 544     }
 545
 546     env->ctr = regs.ctr;
 547     env->lr = regs.lr;
 548     env->xer = regs.xer;
 549     env->msr = regs.msr;
 550     env->nip = regs.pc;
 551
 552     env->spr[SPR_SRR0] = regs.srr0;
 553     env->spr[SPR_SRR1] = regs.srr1;
 554
 555     env->spr[SPR_SPRG0] = regs.sprg0;
 556     env->spr[SPR_SPRG1] = regs.sprg1;
 557     env->spr[SPR_SPRG2] = regs.sprg2;
 558     env->spr[SPR_SPRG3] = regs.sprg3;
 559     env->spr[SPR_SPRG4] = regs.sprg4;
 560     env->spr[SPR_SPRG5] = regs.sprg5;
 561     env->spr[SPR_SPRG6] = regs.sprg6;
 562     env->spr[SPR_SPRG7] = regs.sprg7;
 563
 564     env->spr[SPR_BOOKE_PID] = regs.pid;
 565
 566     for (i = 0;i < 32; i++)
 567         env->gpr[i] = regs.gpr[i];
 568
 569     if (cap_booke_sregs) {
 570         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 571         if (ret < 0) {
 572             return ret;
 573         }
 574
 575         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 576             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 577             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 578             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 579             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 580             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 581             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 582             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 583             env->spr[SPR_DECR] = sregs.u.e.dec;
 584             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 585             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 586             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 587         }
 588
 589         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 590             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 591             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 592             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 593             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 594             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 595         }
 596
 597         if (sregs.u.e.features & KVM_SREGS_E_64) {
 598             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 599         }
 600
 601         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 602             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 603         }
 604
 605         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 606             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 607             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 608             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 609             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 610             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 611             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 612             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 613             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 614             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 615             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 616             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 617             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 618             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 619             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 620             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 621             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 622
 623             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 624                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 625                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 626                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 627             }
 628
 629             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 630                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 631             }
 632
 633             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 634                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 635                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 636             }
 637         }
 638
 639         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 640             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 641             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 642             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 643             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 644             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 645             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 646             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 647             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 648             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 649             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 650         }
 651
 652         if (sregs.u.e.features & KVM_SREGS_EXP) {
 653             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
 654         }
 655
 656         if (sregs.u.e.features & KVM_SREGS_E_PD) {
 657             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
 658             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
 659         }
 660
 661         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
 662             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
 663             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
 664             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
 665
 666             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
 667                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
 668                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
 669             }
 670         }
 671     }
 672
 673     if (cap_segstate) {
 674         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 675         if (ret < 0) {
 676             return ret;
 677         }
 678
 679         ppc_store_sdr1(env, sregs.u.s.sdr1);
 680
 681         /* Sync SLB */
 682 #ifdef TARGET_PPC64
 683         for (i = 0; i < 64; i++) {
 684             ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
 685                                sregs.u.s.ppc64.slb[i].slbv);
 686         }
 687 #endif
 688
 689         /* Sync SRs */
 690         for (i = 0; i < 16; i++) {
 691             env->sr[i] = sregs.u.s.ppc32.sr[i];
 692         }
 693
 694         /* Sync BATs */
 695         for (i = 0; i < 8; i++) {
 696             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
 697             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
 698             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
 699             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
 700         }
 701     }
 702
 703     return 0;
 704 }
 705
 706 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
 707 {
 708     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 709
 710     if (irq != PPC_INTERRUPT_EXT) {
 711         return 0;
 712     }
 713
 714     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
 715         return 0;
 716     }
 717
 718     kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
 719
 720     return 0;
 721 }
 722
 723 #if defined(TARGET_PPCEMB)
 724 #define PPC_INPUT_INT PPC40x_INPUT_INT
 725 #elif defined(TARGET_PPC64)
 726 #define PPC_INPUT_INT PPC970_INPUT_INT
 727 #else
 728 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 729 #endif
 730
 731 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
 732 {
 733     int r;
 734     unsigned irq;
 735
 736     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
 737      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
 738     if (!cap_interrupt_level &&
 739         run->ready_for_interrupt_injection &&
 740         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
 741         (env->irq_input_state & (1<<PPC_INPUT_INT)))
 742     {
 743         /* For now KVM disregards the 'irq' argument. However, in the
 744          * future KVM could cache it in-kernel to avoid a heavyweight exit
 745          * when reading the UIC.
 746          */
 747         irq = KVM_INTERRUPT_SET;
 748
 749         dprintf("injected interrupt %d\n", irq);
 750         r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
 751         if (r < 0)
 752             printf("cpu %d fail inject %x\n", env->cpu_index, irq);
 753
 754         /* Always wake up soon in case the interrupt was level based */
 755         qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
 756                        (get_ticks_per_sec() / 50));
 757     }
 758
 759     /* We don't know if there are more interrupts pending after this. However,
 760      * the guest will return to userspace in the course of handling this one
 761      * anyways, so we will get a chance to deliver the rest. */
 762 }
 763
 764 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
 765 {
 766 }
 767
 768 int kvm_arch_process_async_events(CPUPPCState *env)
 769 {
 770     return env->halted;
 771 }
 772
 773 static int kvmppc_handle_halt(CPUPPCState *env)
 774 {
 775     if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
 776         env->halted = 1;
 777         env->exception_index = EXCP_HLT;
 778     }
 779
 780     return 0;
 781 }
 782
 783 /* map dcr access to existing qemu dcr emulation */
 784 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
 785 {
 786     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
 787         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
 788
 789     return 0;
 790 }
 791
 792 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
 793 {
 794     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
 795         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
 796
 797     return 0;
 798 }
 799
 800 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
 801 {
 802     int ret;
 803
 804     switch (run->exit_reason) {
 805     case KVM_EXIT_DCR:
 806         if (run->dcr.is_write) {
 807             dprintf("handle dcr write\n");
 808             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
 809         } else {
 810             dprintf("handle dcr read\n");
 811             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
 812         }
 813         break;
 814     case KVM_EXIT_HLT:
 815         dprintf("handle halt\n");
 816         ret = kvmppc_handle_halt(env);
 817         break;
 818 #ifdef CONFIG_PSERIES
 819     case KVM_EXIT_PAPR_HCALL:
 820         dprintf("handle PAPR hypercall\n");
 821         run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
 822                                               run->papr_hcall.args);
 823         ret = 0;
 824         break;
 825 #endif
 826     default:
 827         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
 828         ret = -1;
 829         break;
 830     }
 831
 832     return ret;
 833 }
 834
 835 static int read_cpuinfo(const char *field, char *value, int len)
 836 {
 837     FILE *f;
 838     int ret = -1;
 839     int field_len = strlen(field);
 840     char line[512];
 841
 842     f = fopen("/proc/cpuinfo", "r");
 843     if (!f) {
 844         return -1;
 845     }
 846
 847     do {
 848         if(!fgets(line, sizeof(line), f)) {
 849             break;
 850         }
 851         if (!strncmp(line, field, field_len)) {
 852             pstrcpy(value, len, line);
 853             ret = 0;
 854             break;
 855         }
 856     } while(*line);
 857
 858     fclose(f);
 859
 860     return ret;
 861 }
 862
 863 uint32_t kvmppc_get_tbfreq(void)
 864 {
 865     char line[512];
 866     char *ns;
 867     uint32_t retval = get_ticks_per_sec();
 868
 869     if (read_cpuinfo("timebase", line, sizeof(line))) {
 870         return retval;
 871     }
 872
 873     if (!(ns = strchr(line, ':'))) {
 874         return retval;
 875     }
 876
 877     ns++;
 878
 879     retval = atoi(ns);
 880     return retval;
 881 }
 882
 883 /* Try to find a device tree node for a CPU with clock-frequency property */
 884 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 885 {
 886     struct dirent *dirp;
 887     DIR *dp;
 888
 889     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
 890         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
 891         return -1;
 892     }
 893
 894     buf[0] = '\0';
 895     while ((dirp = readdir(dp)) != NULL) {
 896         FILE *f;
 897         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
 898                  dirp->d_name);
 899         f = fopen(buf, "r");
 900         if (f) {
 901             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
 902             fclose(f);
 903             break;
 904         }
 905         buf[0] = '\0';
 906     }
 907     closedir(dp);
 908     if (buf[0] == '\0') {
 909         printf("Unknown host!\n");
 910         return -1;
 911     }
 912
 913     return 0;
 914 }
 915
 916 /* Read a CPU node property from the host device tree that's a single
 917  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
 918  * (can't find or open the property, or doesn't understand the
 919  * format) */
 920 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
 921 {
 922     char buf[PATH_MAX];
 923     union {
 924         uint32_t v32;
 925         uint64_t v64;
 926     } u;
 927     FILE *f;
 928     int len;
 929
 930     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
 931         return -1;
 932     }
 933
 934     strncat(buf, "/", sizeof(buf) - strlen(buf));
 935     strncat(buf, propname, sizeof(buf) - strlen(buf));
 936
 937     f = fopen(buf, "rb");
 938     if (!f) {
 939         return -1;
 940     }
 941
 942     len = fread(&u, 1, sizeof(u), f);
 943     fclose(f);
 944     switch (len) {
 945     case 4:
 946         /* property is a 32-bit quantity */
 947         return be32_to_cpu(u.v32);
 948     case 8:
 949         return be64_to_cpu(u.v64);
 950     }
 951
 952     return 0;
 953 }
 954
 955 uint64_t kvmppc_get_clockfreq(void)
 956 {
 957     return kvmppc_read_int_cpu_dt("clock-frequency");
 958 }
 959
 960 uint32_t kvmppc_get_vmx(void)
 961 {
 962     return kvmppc_read_int_cpu_dt("ibm,vmx");
 963 }
 964
 965 uint32_t kvmppc_get_dfp(void)
 966 {
 967     return kvmppc_read_int_cpu_dt("ibm,dfp");
 968 }
 969
 970 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 971 {
 972     uint32_t *hc = (uint32_t*)buf;
 973
 974     struct kvm_ppc_pvinfo pvinfo;
 975
 976     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
 977         !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
 978         memcpy(buf, pvinfo.hcall, buf_len);
 979
 980         return 0;
 981     }
 982
 983     /*
 984      * Fallback to always fail hypercalls:
 985      *
 986      *     li r3, -1
 987      *     nop
 988      *     nop
 989      *     nop
 990      */
 991
 992     hc[0] = 0x3860ffff;
 993     hc[1] = 0x60000000;
 994     hc[2] = 0x60000000;
 995     hc[3] = 0x60000000;
 996
 997     return 0;
 998 }
 999
1000 void kvmppc_set_papr(CPUPPCState *env)
1001 {
1002     struct kvm_enable_cap cap = {};
1003     int ret;
1004
1005     cap.cap = KVM_CAP_PPC_PAPR;
1006     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1007
1008     if (ret) {
1009         cpu_abort(env, "This KVM version does not support PAPR\n");
1010     }
1011 }
1012
1013 int kvmppc_smt_threads(void)
1014 {
1015     return cap_ppc_smt ? cap_ppc_smt : 1;
1016 }
1017
1018 #ifdef TARGET_PPC64
1019 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1020 {
1021     void *rma;
1022     off_t size;
1023     int fd;
1024     struct kvm_allocate_rma ret;
1025     MemoryRegion *rma_region;
1026
1027     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1028      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1029      *                      not necessary on this hardware
1030      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1031      *
1032      * FIXME: We should allow the user to force contiguous RMA
1033      * allocation in the cap_ppc_rma==1 case.
1034      */
1035     if (cap_ppc_rma < 2) {
1036         return 0;
1037     }
1038
1039     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1040     if (fd < 0) {
1041         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1042                 strerror(errno));
1043         return -1;
1044     }
1045
1046     size = MIN(ret.rma_size, 256ul << 20);
1047
1048     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1049     if (rma == MAP_FAILED) {
1050         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1051         return -1;
1052     };
1053
1054     rma_region = g_new(MemoryRegion, 1);
1055     memory_region_init_ram_ptr(rma_region, name, size, rma);
1056     vmstate_register_ram_global(rma_region);
1057     memory_region_add_subregion(sysmem, 0, rma_region);
1058
1059     return size;
1060 }
1061
1062 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1063 {
1064     if (cap_ppc_rma >= 2) {
1065         return current_size;
1066     }
1067     return MIN(current_size,
1068                getrampagesize() << (hash_shift - 7));
1069 }
1070 #endif
1071
1072 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1073 {
1074     struct kvm_create_spapr_tce args = {
1075         .liobn = liobn,
1076         .window_size = window_size,
1077     };
1078     long len;
1079     int fd;
1080     void *table;
1081
1082     /* Must set fd to -1 so we don't try to munmap when called for
1083      * destroying the table, which the upper layers -will- do
1084      */
1085     *pfd = -1;
1086     if (!cap_spapr_tce) {
1087         return NULL;
1088     }
1089
1090     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1091     if (fd < 0) {
1092         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1093                 liobn);
1094         return NULL;
1095     }
1096
1097     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1098     /* FIXME: round this up to page size */
1099
1100     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1101     if (table == MAP_FAILED) {
1102         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1103                 liobn);
1104         close(fd);
1105         return NULL;
1106     }
1107
1108     *pfd = fd;
1109     return table;
1110 }
1111
1112 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1113 {
1114     long len;
1115
1116     if (fd < 0) {
1117         return -1;
1118     }
1119
1120     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1121     if ((munmap(table, len) < 0) ||
1122         (close(fd) < 0)) {
1123         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1124                 strerror(errno));
1125         /* Leak the table */
1126     }
1127
1128     return 0;
1129 }
1130
1131 int kvmppc_reset_htab(int shift_hint)
1132 {
1133     uint32_t shift = shift_hint;
1134
1135     if (!kvm_enabled()) {
1136         /* Full emulation, tell caller to allocate htab itself */
1137         return 0;
1138     }
1139     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1140         int ret;
1141         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1142         if (ret == -ENOTTY) {
1143             /* At least some versions of PR KVM advertise the
1144              * capability, but don't implement the ioctl().  Oops.
1145              * Return 0 so that we allocate the htab in qemu, as is
1146              * correct for PR. */
1147             return 0;
1148         } else if (ret < 0) {
1149             return ret;
1150         }
1151         return shift;
1152     }
1153
1154     /* We have a kernel that predates the htab reset calls.  For PR
1155      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1156      * this era, it has allocated a 16MB fixed size hash table
1157      * already.  Kernels of this era have the GET_PVINFO capability
1158      * only on PR, so we use this hack to determine the right
1159      * answer */
1160     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1161         /* PR - tell caller to allocate htab */
1162         return 0;
1163     } else {
1164         /* HV - assume 16MB kernel allocated htab */
1165         return 24;
1166     }
1167 }
1168
1169 static inline uint32_t mfpvr(void)
1170 {
1171     uint32_t pvr;
1172
1173     asm ("mfpvr %0"
1174          : "=r"(pvr));
1175     return pvr;
1176 }
1177
1178 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1179 {
1180     if (on) {
1181         *word |= flags;
1182     } else {
1183         *word &= ~flags;
1184     }
1185 }
1186
1187 const ppc_def_t *kvmppc_host_cpu_def(void)
1188 {
1189     uint32_t host_pvr = mfpvr();
1190     const ppc_def_t *base_spec;
1191     ppc_def_t *spec;
1192     uint32_t vmx = kvmppc_get_vmx();
1193     uint32_t dfp = kvmppc_get_dfp();
1194
1195     base_spec = ppc_find_by_pvr(host_pvr);
1196
1197     spec = g_malloc0(sizeof(*spec));
1198     memcpy(spec, base_spec, sizeof(*spec));
1199
1200     /* Now fix up the spec with information we can query from the host */
1201
1202     if (vmx != -1) {
1203         /* Only override when we know what the host supports */
1204         alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1205         alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1206     }
1207     if (dfp != -1) {
1208         /* Only override when we know what the host supports */
1209         alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1210     }
1211
1212     return spec;
1213 }
1214
1215 int kvmppc_fixup_cpu(CPUPPCState *env)
1216 {
1217     int smt;
1218
1219     /* Adjust cpu index for SMT */
1220     smt = kvmppc_smt_threads();
1221     env->cpu_index = (env->cpu_index / smp_threads) * smt
1222         + (env->cpu_index % smp_threads);
1223
1224     return 0;
1225 }
1226
1227
1228 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1229 {
1230     return true;
1231 }
1232
1233 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1234 {
1235     return 1;
1236 }
1237
1238 int kvm_arch_on_sigbus(int code, void *addr)
1239 {
1240     return 1;
1241 }