target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu-timer.h"
  27 #include "sysemu.h"
  28 #include "kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "cpus.h"
  32 #include "device_tree.h"
  33 #include "hw/sysbus.h"
  34 #include "hw/spapr.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/spapr.h"
  38 #include "hw/spapr_vio.h"
  39
  40 //#define DEBUG_KVM
  41
  42 #ifdef DEBUG_KVM
  43 #define dprintf(fmt, ...) \
  44     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  45 #else
  46 #define dprintf(fmt, ...) \
  47     do { } while (0)
  48 #endif
  49
  50 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  51
  52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  53     KVM_CAP_LAST_INFO
  54 };
  55
  56 static int cap_interrupt_unset = false;
  57 static int cap_interrupt_level = false;
  58 static int cap_segstate;
  59 static int cap_booke_sregs;
  60 static int cap_ppc_smt;
  61 static int cap_ppc_rma;
  62 static int cap_spapr_tce;
  63 static int cap_hior;
  64
  65 /* XXX We have a race condition where we actually have a level triggered
  66  *     interrupt, but the infrastructure can't expose that yet, so the guest
  67  *     takes but ignores it, goes to sleep and never gets notified that there's
  68  *     still an interrupt pending.
  69  *
  70  *     As a quick workaround, let's just wake up again 20 ms after we injected
  71  *     an interrupt. That way we can assure that we're always reinjecting
  72  *     interrupts in case the guest swallowed them.
  73  */
  74 static QEMUTimer *idle_timer;
  75
  76 static void kvm_kick_env(void *env)
  77 {
  78     qemu_cpu_kick(env);
  79 }
  80
  81 int kvm_arch_init(KVMState *s)
  82 {
  83     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  84     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  85     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  86     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  87     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  88     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  89     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
  90     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
  91
  92     if (!cap_interrupt_level) {
  93         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
  94                         "VM to stall at times!\n");
  95     }
  96
  97     return 0;
  98 }
  99
 100 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
 101 {
 102     struct kvm_sregs sregs;
 103     int ret;
 104
 105     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 106         /* What we're really trying to say is "if we're on BookE, we use
 107            the native PVR for now". This is the only sane way to check
 108            it though, so we potentially confuse users that they can run
 109            BookE guests on BookS. Let's hope nobody dares enough :) */
 110         return 0;
 111     } else {
 112         if (!cap_segstate) {
 113             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 114             return -ENOSYS;
 115         }
 116     }
 117
 118     ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
 119     if (ret) {
 120         return ret;
 121     }
 122
 123     sregs.pvr = cenv->spr[SPR_PVR];
 124     return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
 125 }
 126
 127 /* Set up a shared TLB array with KVM */
 128 static int kvm_booke206_tlb_init(CPUPPCState *env)
 129 {
 130     struct kvm_book3e_206_tlb_params params = {};
 131     struct kvm_config_tlb cfg = {};
 132     struct kvm_enable_cap encap = {};
 133     unsigned int entries = 0;
 134     int ret, i;
 135
 136     if (!kvm_enabled() ||
 137         !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
 138         return 0;
 139     }
 140
 141     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 142
 143     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 144         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 145         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 146         entries += params.tlb_sizes[i];
 147     }
 148
 149     assert(entries == env->nb_tlb);
 150     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 151
 152     env->tlb_dirty = true;
 153
 154     cfg.array = (uintptr_t)env->tlb.tlbm;
 155     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 156     cfg.params = (uintptr_t)&params;
 157     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 158
 159     encap.cap = KVM_CAP_SW_TLB;
 160     encap.args[0] = (uintptr_t)&cfg;
 161
 162     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
 163     if (ret < 0) {
 164         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 165                 __func__, strerror(-ret));
 166         return ret;
 167     }
 168
 169     env->kvm_sw_tlb = true;
 170     return 0;
 171 }
 172
 173
 174 #if defined(TARGET_PPC64)
 175 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
 176                                        struct kvm_ppc_smmu_info *info)
 177 {
 178     memset(info, 0, sizeof(*info));
 179
 180     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 181      * need to "guess" what the supported page sizes are.
 182      *
 183      * For that to work we make a few assumptions:
 184      *
 185      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 186      *   KVM which only supports 4K and 16M pages, but supports them
 187      *   regardless of the backing store characteritics. We also don't
 188      *   support 1T segments.
 189      *
 190      *   This is safe as if HV KVM ever supports that capability or PR
 191      *   KVM grows supports for more page/segment sizes, those versions
 192      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 193      *   will not hit this fallback
 194      *
 195      * - Else we are running HV KVM. This means we only support page
 196      *   sizes that fit in the backing store. Additionally we only
 197      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 198      *   P7 encodings for the SLB and hash table. Here too, we assume
 199      *   support for any newer processor will mean a kernel that
 200      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 201      *   this fallback.
 202      */
 203     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 204         /* No flags */
 205         info->flags = 0;
 206         info->slb_size = 64;
 207
 208         /* Standard 4k base page size segment */
 209         info->sps[0].page_shift = 12;
 210         info->sps[0].slb_enc = 0;
 211         info->sps[0].enc[0].page_shift = 12;
 212         info->sps[0].enc[0].pte_enc = 0;
 213
 214         /* Standard 16M large page size segment */
 215         info->sps[1].page_shift = 24;
 216         info->sps[1].slb_enc = SLB_VSID_L;
 217         info->sps[1].enc[0].page_shift = 24;
 218         info->sps[1].enc[0].pte_enc = 0;
 219     } else {
 220         int i = 0;
 221
 222         /* HV KVM has backing store size restrictions */
 223         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 224
 225         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 226             info->flags |= KVM_PPC_1T_SEGMENTS;
 227         }
 228
 229         if (env->mmu_model == POWERPC_MMU_2_06) {
 230             info->slb_size = 32;
 231         } else {
 232             info->slb_size = 64;
 233         }
 234
 235         /* Standard 4k base page size segment */
 236         info->sps[i].page_shift = 12;
 237         info->sps[i].slb_enc = 0;
 238         info->sps[i].enc[0].page_shift = 12;
 239         info->sps[i].enc[0].pte_enc = 0;
 240         i++;
 241
 242         /* 64K on MMU 2.06 */
 243         if (env->mmu_model == POWERPC_MMU_2_06) {
 244             info->sps[i].page_shift = 16;
 245             info->sps[i].slb_enc = 0x110;
 246             info->sps[i].enc[0].page_shift = 16;
 247             info->sps[i].enc[0].pte_enc = 1;
 248             i++;
 249         }
 250
 251         /* Standard 16M large page size segment */
 252         info->sps[i].page_shift = 24;
 253         info->sps[i].slb_enc = SLB_VSID_L;
 254         info->sps[i].enc[0].page_shift = 24;
 255         info->sps[i].enc[0].pte_enc = 0;
 256     }
 257 }
 258
 259 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
 260 {
 261     int ret;
 262
 263     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 264         ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 265         if (ret == 0) {
 266             return;
 267         }
 268     }
 269
 270     kvm_get_fallback_smmu_info(env, info);
 271 }
 272
 273 static long getrampagesize(void)
 274 {
 275     struct statfs fs;
 276     int ret;
 277
 278     if (!mem_path) {
 279         /* guest RAM is backed by normal anonymous pages */
 280         return getpagesize();
 281     }
 282
 283     do {
 284         ret = statfs(mem_path, &fs);
 285     } while (ret != 0 && errno == EINTR);
 286
 287     if (ret != 0) {
 288         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 289                 strerror(errno));
 290         exit(1);
 291     }
 292
 293 #define HUGETLBFS_MAGIC       0x958458f6
 294
 295     if (fs.f_type != HUGETLBFS_MAGIC) {
 296         /* Explicit mempath, but it's ordinary pages */
 297         return getpagesize();
 298     }
 299
 300     /* It's hugepage, return the huge page size */
 301     return fs.f_bsize;
 302 }
 303
 304 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 305 {
 306     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 307         return true;
 308     }
 309
 310     return (1ul << shift) <= rampgsize;
 311 }
 312
 313 static void kvm_fixup_page_sizes(CPUPPCState *env)
 314 {
 315     static struct kvm_ppc_smmu_info smmu_info;
 316     static bool has_smmu_info;
 317     long rampagesize;
 318     int iq, ik, jq, jk;
 319
 320     /* We only handle page sizes for 64-bit server guests for now */
 321     if (!(env->mmu_model & POWERPC_MMU_64)) {
 322         return;
 323     }
 324
 325     /* Collect MMU info from kernel if not already */
 326     if (!has_smmu_info) {
 327         kvm_get_smmu_info(env, &smmu_info);
 328         has_smmu_info = true;
 329     }
 330
 331     rampagesize = getrampagesize();
 332
 333     /* Convert to QEMU form */
 334     memset(&env->sps, 0, sizeof(env->sps));
 335
 336     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 337         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 338         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 339
 340         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 341                                  ksps->page_shift)) {
 342             continue;
 343         }
 344         qsps->page_shift = ksps->page_shift;
 345         qsps->slb_enc = ksps->slb_enc;
 346         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 347             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 348                                      ksps->enc[jk].page_shift)) {
 349                 continue;
 350             }
 351             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 352             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 353             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 354                 break;
 355             }
 356         }
 357         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 358             break;
 359         }
 360     }
 361     env->slb_nr = smmu_info.slb_size;
 362     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 363         env->mmu_model |= POWERPC_MMU_1TSEG;
 364     } else {
 365         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 366     }
 367 }
 368 #else /* defined (TARGET_PPC64) */
 369
 370 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
 371 {
 372 }
 373
 374 #endif /* !defined (TARGET_PPC64) */
 375
 376 int kvm_arch_init_vcpu(CPUPPCState *cenv)
 377 {
 378     int ret;
 379
 380     /* Gather server mmu info from KVM and update the CPU state */
 381     kvm_fixup_page_sizes(cenv);
 382
 383     /* Synchronize sregs with kvm */
 384     ret = kvm_arch_sync_sregs(cenv);
 385     if (ret) {
 386         return ret;
 387     }
 388
 389     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
 390
 391     /* Some targets support access to KVM's guest TLB. */
 392     switch (cenv->mmu_model) {
 393     case POWERPC_MMU_BOOKE206:
 394         ret = kvm_booke206_tlb_init(cenv);
 395         break;
 396     default:
 397         break;
 398     }
 399
 400     return ret;
 401 }
 402
 403 void kvm_arch_reset_vcpu(CPUPPCState *env)
 404 {
 405 }
 406
 407 static void kvm_sw_tlb_put(CPUPPCState *env)
 408 {
 409     struct kvm_dirty_tlb dirty_tlb;
 410     unsigned char *bitmap;
 411     int ret;
 412
 413     if (!env->kvm_sw_tlb) {
 414         return;
 415     }
 416
 417     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 418     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 419
 420     dirty_tlb.bitmap = (uintptr_t)bitmap;
 421     dirty_tlb.num_dirty = env->nb_tlb;
 422
 423     ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
 424     if (ret) {
 425         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 426                 __func__, strerror(-ret));
 427     }
 428
 429     g_free(bitmap);
 430 }
 431
 432 int kvm_arch_put_registers(CPUPPCState *env, int level)
 433 {
 434     struct kvm_regs regs;
 435     int ret;
 436     int i;
 437
 438     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 439     if (ret < 0)
 440         return ret;
 441
 442     regs.ctr = env->ctr;
 443     regs.lr  = env->lr;
 444     regs.xer = env->xer;
 445     regs.msr = env->msr;
 446     regs.pc = env->nip;
 447
 448     regs.srr0 = env->spr[SPR_SRR0];
 449     regs.srr1 = env->spr[SPR_SRR1];
 450
 451     regs.sprg0 = env->spr[SPR_SPRG0];
 452     regs.sprg1 = env->spr[SPR_SPRG1];
 453     regs.sprg2 = env->spr[SPR_SPRG2];
 454     regs.sprg3 = env->spr[SPR_SPRG3];
 455     regs.sprg4 = env->spr[SPR_SPRG4];
 456     regs.sprg5 = env->spr[SPR_SPRG5];
 457     regs.sprg6 = env->spr[SPR_SPRG6];
 458     regs.sprg7 = env->spr[SPR_SPRG7];
 459
 460     regs.pid = env->spr[SPR_BOOKE_PID];
 461
 462     for (i = 0;i < 32; i++)
 463         regs.gpr[i] = env->gpr[i];
 464
 465     ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
 466     if (ret < 0)
 467         return ret;
 468
 469     if (env->tlb_dirty) {
 470         kvm_sw_tlb_put(env);
 471         env->tlb_dirty = false;
 472     }
 473
 474     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 475         struct kvm_sregs sregs;
 476
 477         sregs.pvr = env->spr[SPR_PVR];
 478
 479         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 480
 481         /* Sync SLB */
 482 #ifdef TARGET_PPC64
 483         for (i = 0; i < 64; i++) {
 484             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 485             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 486         }
 487 #endif
 488
 489         /* Sync SRs */
 490         for (i = 0; i < 16; i++) {
 491             sregs.u.s.ppc32.sr[i] = env->sr[i];
 492         }
 493
 494         /* Sync BATs */
 495         for (i = 0; i < 8; i++) {
 496             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[1][i] << 32)
 497                 | env->DBAT[0][i];
 498             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[1][i] << 32)
 499                 | env->IBAT[0][i];
 500         }
 501
 502         ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
 503         if (ret) {
 504             return ret;
 505         }
 506     }
 507
 508     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 509         uint64_t hior = env->spr[SPR_HIOR];
 510         struct kvm_one_reg reg = {
 511             .id = KVM_REG_PPC_HIOR,
 512             .addr = (uintptr_t) &hior,
 513         };
 514
 515         ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
 516         if (ret) {
 517             return ret;
 518         }
 519     }
 520
 521     return ret;
 522 }
 523
 524 int kvm_arch_get_registers(CPUPPCState *env)
 525 {
 526     struct kvm_regs regs;
 527     struct kvm_sregs sregs;
 528     uint32_t cr;
 529     int i, ret;
 530
 531     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 532     if (ret < 0)
 533         return ret;
 534
 535     cr = regs.cr;
 536     for (i = 7; i >= 0; i--) {
 537         env->crf[i] = cr & 15;
 538         cr >>= 4;
 539     }
 540
 541     env->ctr = regs.ctr;
 542     env->lr = regs.lr;
 543     env->xer = regs.xer;
 544     env->msr = regs.msr;
 545     env->nip = regs.pc;
 546
 547     env->spr[SPR_SRR0] = regs.srr0;
 548     env->spr[SPR_SRR1] = regs.srr1;
 549
 550     env->spr[SPR_SPRG0] = regs.sprg0;
 551     env->spr[SPR_SPRG1] = regs.sprg1;
 552     env->spr[SPR_SPRG2] = regs.sprg2;
 553     env->spr[SPR_SPRG3] = regs.sprg3;
 554     env->spr[SPR_SPRG4] = regs.sprg4;
 555     env->spr[SPR_SPRG5] = regs.sprg5;
 556     env->spr[SPR_SPRG6] = regs.sprg6;
 557     env->spr[SPR_SPRG7] = regs.sprg7;
 558
 559     env->spr[SPR_BOOKE_PID] = regs.pid;
 560
 561     for (i = 0;i < 32; i++)
 562         env->gpr[i] = regs.gpr[i];
 563
 564     if (cap_booke_sregs) {
 565         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 566         if (ret < 0) {
 567             return ret;
 568         }
 569
 570         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 571             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 572             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 573             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 574             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 575             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 576             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 577             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 578             env->spr[SPR_DECR] = sregs.u.e.dec;
 579             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 580             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 581             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 582         }
 583
 584         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 585             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 586             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 587             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 588             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 589             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 590         }
 591
 592         if (sregs.u.e.features & KVM_SREGS_E_64) {
 593             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 594         }
 595
 596         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 597             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 598         }
 599
 600         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 601             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 602             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 603             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 604             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 605             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 606             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 607             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 608             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 609             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 610             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 611             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 612             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 613             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 614             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 615             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 616             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 617
 618             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 619                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 620                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 621                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 622             }
 623
 624             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 625                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 626             }
 627
 628             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 629                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 630                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 631             }
 632         }
 633
 634         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 635             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 636             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 637             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 638             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 639             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 640             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 641             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 642             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 643             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 644             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 645         }
 646
 647         if (sregs.u.e.features & KVM_SREGS_EXP) {
 648             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
 649         }
 650
 651         if (sregs.u.e.features & KVM_SREGS_E_PD) {
 652             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
 653             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
 654         }
 655
 656         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
 657             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
 658             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
 659             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
 660
 661             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
 662                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
 663                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
 664             }
 665         }
 666     }
 667
 668     if (cap_segstate) {
 669         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 670         if (ret < 0) {
 671             return ret;
 672         }
 673
 674         ppc_store_sdr1(env, sregs.u.s.sdr1);
 675
 676         /* Sync SLB */
 677 #ifdef TARGET_PPC64
 678         for (i = 0; i < 64; i++) {
 679             ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
 680                                sregs.u.s.ppc64.slb[i].slbv);
 681         }
 682 #endif
 683
 684         /* Sync SRs */
 685         for (i = 0; i < 16; i++) {
 686             env->sr[i] = sregs.u.s.ppc32.sr[i];
 687         }
 688
 689         /* Sync BATs */
 690         for (i = 0; i < 8; i++) {
 691             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
 692             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
 693             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
 694             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
 695         }
 696     }
 697
 698     return 0;
 699 }
 700
 701 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
 702 {
 703     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 704
 705     if (irq != PPC_INTERRUPT_EXT) {
 706         return 0;
 707     }
 708
 709     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
 710         return 0;
 711     }
 712
 713     kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
 714
 715     return 0;
 716 }
 717
 718 #if defined(TARGET_PPCEMB)
 719 #define PPC_INPUT_INT PPC40x_INPUT_INT
 720 #elif defined(TARGET_PPC64)
 721 #define PPC_INPUT_INT PPC970_INPUT_INT
 722 #else
 723 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 724 #endif
 725
 726 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
 727 {
 728     int r;
 729     unsigned irq;
 730
 731     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
 732      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
 733     if (!cap_interrupt_level &&
 734         run->ready_for_interrupt_injection &&
 735         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
 736         (env->irq_input_state & (1<<PPC_INPUT_INT)))
 737     {
 738         /* For now KVM disregards the 'irq' argument. However, in the
 739          * future KVM could cache it in-kernel to avoid a heavyweight exit
 740          * when reading the UIC.
 741          */
 742         irq = KVM_INTERRUPT_SET;
 743
 744         dprintf("injected interrupt %d\n", irq);
 745         r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
 746         if (r < 0)
 747             printf("cpu %d fail inject %x\n", env->cpu_index, irq);
 748
 749         /* Always wake up soon in case the interrupt was level based */
 750         qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
 751                        (get_ticks_per_sec() / 50));
 752     }
 753
 754     /* We don't know if there are more interrupts pending after this. However,
 755      * the guest will return to userspace in the course of handling this one
 756      * anyways, so we will get a chance to deliver the rest. */
 757 }
 758
 759 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
 760 {
 761 }
 762
 763 int kvm_arch_process_async_events(CPUPPCState *env)
 764 {
 765     return env->halted;
 766 }
 767
 768 static int kvmppc_handle_halt(CPUPPCState *env)
 769 {
 770     if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
 771         env->halted = 1;
 772         env->exception_index = EXCP_HLT;
 773     }
 774
 775     return 0;
 776 }
 777
 778 /* map dcr access to existing qemu dcr emulation */
 779 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
 780 {
 781     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
 782         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
 783
 784     return 0;
 785 }
 786
 787 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
 788 {
 789     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
 790         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
 791
 792     return 0;
 793 }
 794
 795 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
 796 {
 797     int ret;
 798
 799     switch (run->exit_reason) {
 800     case KVM_EXIT_DCR:
 801         if (run->dcr.is_write) {
 802             dprintf("handle dcr write\n");
 803             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
 804         } else {
 805             dprintf("handle dcr read\n");
 806             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
 807         }
 808         break;
 809     case KVM_EXIT_HLT:
 810         dprintf("handle halt\n");
 811         ret = kvmppc_handle_halt(env);
 812         break;
 813 #ifdef CONFIG_PSERIES
 814     case KVM_EXIT_PAPR_HCALL:
 815         dprintf("handle PAPR hypercall\n");
 816         run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
 817                                               run->papr_hcall.args);
 818         ret = 0;
 819         break;
 820 #endif
 821     default:
 822         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
 823         ret = -1;
 824         break;
 825     }
 826
 827     return ret;
 828 }
 829
 830 static int read_cpuinfo(const char *field, char *value, int len)
 831 {
 832     FILE *f;
 833     int ret = -1;
 834     int field_len = strlen(field);
 835     char line[512];
 836
 837     f = fopen("/proc/cpuinfo", "r");
 838     if (!f) {
 839         return -1;
 840     }
 841
 842     do {
 843         if(!fgets(line, sizeof(line), f)) {
 844             break;
 845         }
 846         if (!strncmp(line, field, field_len)) {
 847             strncpy(value, line, len);
 848             ret = 0;
 849             break;
 850         }
 851     } while(*line);
 852
 853     fclose(f);
 854
 855     return ret;
 856 }
 857
 858 uint32_t kvmppc_get_tbfreq(void)
 859 {
 860     char line[512];
 861     char *ns;
 862     uint32_t retval = get_ticks_per_sec();
 863
 864     if (read_cpuinfo("timebase", line, sizeof(line))) {
 865         return retval;
 866     }
 867
 868     if (!(ns = strchr(line, ':'))) {
 869         return retval;
 870     }
 871
 872     ns++;
 873
 874     retval = atoi(ns);
 875     return retval;
 876 }
 877
 878 /* Try to find a device tree node for a CPU with clock-frequency property */
 879 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 880 {
 881     struct dirent *dirp;
 882     DIR *dp;
 883
 884     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
 885         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
 886         return -1;
 887     }
 888
 889     buf[0] = '\0';
 890     while ((dirp = readdir(dp)) != NULL) {
 891         FILE *f;
 892         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
 893                  dirp->d_name);
 894         f = fopen(buf, "r");
 895         if (f) {
 896             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
 897             fclose(f);
 898             break;
 899         }
 900         buf[0] = '\0';
 901     }
 902     closedir(dp);
 903     if (buf[0] == '\0') {
 904         printf("Unknown host!\n");
 905         return -1;
 906     }
 907
 908     return 0;
 909 }
 910
 911 /* Read a CPU node property from the host device tree that's a single
 912  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
 913  * (can't find or open the property, or doesn't understand the
 914  * format) */
 915 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
 916 {
 917     char buf[PATH_MAX];
 918     union {
 919         uint32_t v32;
 920         uint64_t v64;
 921     } u;
 922     FILE *f;
 923     int len;
 924
 925     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
 926         return -1;
 927     }
 928
 929     strncat(buf, "/", sizeof(buf) - strlen(buf));
 930     strncat(buf, propname, sizeof(buf) - strlen(buf));
 931
 932     f = fopen(buf, "rb");
 933     if (!f) {
 934         return -1;
 935     }
 936
 937     len = fread(&u, 1, sizeof(u), f);
 938     fclose(f);
 939     switch (len) {
 940     case 4:
 941         /* property is a 32-bit quantity */
 942         return be32_to_cpu(u.v32);
 943     case 8:
 944         return be64_to_cpu(u.v64);
 945     }
 946
 947     return 0;
 948 }
 949
 950 uint64_t kvmppc_get_clockfreq(void)
 951 {
 952     return kvmppc_read_int_cpu_dt("clock-frequency");
 953 }
 954
 955 uint32_t kvmppc_get_vmx(void)
 956 {
 957     return kvmppc_read_int_cpu_dt("ibm,vmx");
 958 }
 959
 960 uint32_t kvmppc_get_dfp(void)
 961 {
 962     return kvmppc_read_int_cpu_dt("ibm,dfp");
 963 }
 964
 965 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 966 {
 967     uint32_t *hc = (uint32_t*)buf;
 968
 969     struct kvm_ppc_pvinfo pvinfo;
 970
 971     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
 972         !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
 973         memcpy(buf, pvinfo.hcall, buf_len);
 974
 975         return 0;
 976     }
 977
 978     /*
 979      * Fallback to always fail hypercalls:
 980      *
 981      *     li r3, -1
 982      *     nop
 983      *     nop
 984      *     nop
 985      */
 986
 987     hc[0] = 0x3860ffff;
 988     hc[1] = 0x60000000;
 989     hc[2] = 0x60000000;
 990     hc[3] = 0x60000000;
 991
 992     return 0;
 993 }
 994
 995 void kvmppc_set_papr(CPUPPCState *env)
 996 {
 997     struct kvm_enable_cap cap = {};
 998     int ret;
 999
1000     cap.cap = KVM_CAP_PPC_PAPR;
1001     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1002
1003     if (ret) {
1004         cpu_abort(env, "This KVM version does not support PAPR\n");
1005     }
1006 }
1007
1008 int kvmppc_smt_threads(void)
1009 {
1010     return cap_ppc_smt ? cap_ppc_smt : 1;
1011 }
1012
1013 #ifdef TARGET_PPC64
1014 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1015 {
1016     void *rma;
1017     off_t size;
1018     int fd;
1019     struct kvm_allocate_rma ret;
1020     MemoryRegion *rma_region;
1021
1022     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1023      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1024      *                      not necessary on this hardware
1025      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1026      *
1027      * FIXME: We should allow the user to force contiguous RMA
1028      * allocation in the cap_ppc_rma==1 case.
1029      */
1030     if (cap_ppc_rma < 2) {
1031         return 0;
1032     }
1033
1034     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1035     if (fd < 0) {
1036         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1037                 strerror(errno));
1038         return -1;
1039     }
1040
1041     size = MIN(ret.rma_size, 256ul << 20);
1042
1043     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1044     if (rma == MAP_FAILED) {
1045         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1046         return -1;
1047     };
1048
1049     rma_region = g_new(MemoryRegion, 1);
1050     memory_region_init_ram_ptr(rma_region, name, size, rma);
1051     vmstate_register_ram_global(rma_region);
1052     memory_region_add_subregion(sysmem, 0, rma_region);
1053
1054     return size;
1055 }
1056
1057 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1058 {
1059     if (cap_ppc_rma >= 2) {
1060         return current_size;
1061     }
1062     return MIN(current_size,
1063                getrampagesize() << (hash_shift - 7));
1064 }
1065 #endif
1066
1067 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1068 {
1069     struct kvm_create_spapr_tce args = {
1070         .liobn = liobn,
1071         .window_size = window_size,
1072     };
1073     long len;
1074     int fd;
1075     void *table;
1076
1077     /* Must set fd to -1 so we don't try to munmap when called for
1078      * destroying the table, which the upper layers -will- do
1079      */
1080     *pfd = -1;
1081     if (!cap_spapr_tce) {
1082         return NULL;
1083     }
1084
1085     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1086     if (fd < 0) {
1087         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1088                 liobn);
1089         return NULL;
1090     }
1091
1092     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1093     /* FIXME: round this up to page size */
1094
1095     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1096     if (table == MAP_FAILED) {
1097         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1098                 liobn);
1099         close(fd);
1100         return NULL;
1101     }
1102
1103     *pfd = fd;
1104     return table;
1105 }
1106
1107 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1108 {
1109     long len;
1110
1111     if (fd < 0) {
1112         return -1;
1113     }
1114
1115     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1116     if ((munmap(table, len) < 0) ||
1117         (close(fd) < 0)) {
1118         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1119                 strerror(errno));
1120         /* Leak the table */
1121     }
1122
1123     return 0;
1124 }
1125
1126 int kvmppc_reset_htab(int shift_hint)
1127 {
1128     uint32_t shift = shift_hint;
1129
1130     if (!kvm_enabled()) {
1131         /* Full emulation, tell caller to allocate htab itself */
1132         return 0;
1133     }
1134     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1135         int ret;
1136         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1137         if (ret == -ENOTTY) {
1138             /* At least some versions of PR KVM advertise the
1139              * capability, but don't implement the ioctl().  Oops.
1140              * Return 0 so that we allocate the htab in qemu, as is
1141              * correct for PR. */
1142             return 0;
1143         } else if (ret < 0) {
1144             return ret;
1145         }
1146         return shift;
1147     }
1148
1149     /* We have a kernel that predates the htab reset calls.  For PR
1150      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1151      * this era, it has allocated a 16MB fixed size hash table
1152      * already.  Kernels of this era have the GET_PVINFO capability
1153      * only on PR, so we use this hack to determine the right
1154      * answer */
1155     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1156         /* PR - tell caller to allocate htab */
1157         return 0;
1158     } else {
1159         /* HV - assume 16MB kernel allocated htab */
1160         return 24;
1161     }
1162 }
1163
1164 static inline uint32_t mfpvr(void)
1165 {
1166     uint32_t pvr;
1167
1168     asm ("mfpvr %0"
1169          : "=r"(pvr));
1170     return pvr;
1171 }
1172
1173 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1174 {
1175     if (on) {
1176         *word |= flags;
1177     } else {
1178         *word &= ~flags;
1179     }
1180 }
1181
1182 const ppc_def_t *kvmppc_host_cpu_def(void)
1183 {
1184     uint32_t host_pvr = mfpvr();
1185     const ppc_def_t *base_spec;
1186     ppc_def_t *spec;
1187     uint32_t vmx = kvmppc_get_vmx();
1188     uint32_t dfp = kvmppc_get_dfp();
1189
1190     base_spec = ppc_find_by_pvr(host_pvr);
1191
1192     spec = g_malloc0(sizeof(*spec));
1193     memcpy(spec, base_spec, sizeof(*spec));
1194
1195     /* Now fix up the spec with information we can query from the host */
1196
1197     if (vmx != -1) {
1198         /* Only override when we know what the host supports */
1199         alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1200         alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1201     }
1202     if (dfp != -1) {
1203         /* Only override when we know what the host supports */
1204         alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1205     }
1206
1207     return spec;
1208 }
1209
1210 int kvmppc_fixup_cpu(CPUPPCState *env)
1211 {
1212     int smt;
1213
1214     /* Adjust cpu index for SMT */
1215     smt = kvmppc_smt_threads();
1216     env->cpu_index = (env->cpu_index / smp_threads) * smt
1217         + (env->cpu_index % smp_threads);
1218
1219     return 0;
1220 }
1221
1222
1223 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1224 {
1225     return true;
1226 }
1227
1228 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1229 {
1230     return 1;
1231 }
1232
1233 int kvm_arch_on_sigbus(int code, void *addr)
1234 {
1235     return 1;
1236 }