target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu-timer.h"
  27 #include "sysemu.h"
  28 #include "kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "cpus.h"
  32 #include "device_tree.h"
  33 #include "hw/sysbus.h"
  34 #include "hw/spapr.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/spapr.h"
  38 #include "hw/spapr_vio.h"
  39
  40 //#define DEBUG_KVM
  41
  42 #ifdef DEBUG_KVM
  43 #define dprintf(fmt, ...) \
  44     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  45 #else
  46 #define dprintf(fmt, ...) \
  47     do { } while (0)
  48 #endif
  49
  50 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  51
  52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  53     KVM_CAP_LAST_INFO
  54 };
  55
  56 static int cap_interrupt_unset = false;
  57 static int cap_interrupt_level = false;
  58 static int cap_segstate;
  59 static int cap_booke_sregs;
  60 static int cap_ppc_smt;
  61 static int cap_ppc_rma;
  62 static int cap_spapr_tce;
  63 static int cap_hior;
  64
  65 /* XXX We have a race condition where we actually have a level triggered
  66  *     interrupt, but the infrastructure can't expose that yet, so the guest
  67  *     takes but ignores it, goes to sleep and never gets notified that there's
  68  *     still an interrupt pending.
  69  *
  70  *     As a quick workaround, let's just wake up again 20 ms after we injected
  71  *     an interrupt. That way we can assure that we're always reinjecting
  72  *     interrupts in case the guest swallowed them.
  73  */
  74 static QEMUTimer *idle_timer;
  75
  76 static void kvm_kick_env(void *env)
  77 {
  78     qemu_cpu_kick(env);
  79 }
  80
  81 int kvm_arch_init(KVMState *s)
  82 {
  83     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  84     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  85     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  86     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  87     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  88     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  89     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
  90     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
  91
  92     if (!cap_interrupt_level) {
  93         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
  94                         "VM to stall at times!\n");
  95     }
  96
  97     return 0;
  98 }
  99
 100 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
 101 {
 102     struct kvm_sregs sregs;
 103     int ret;
 104
 105     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 106         /* What we're really trying to say is "if we're on BookE, we use
 107            the native PVR for now". This is the only sane way to check
 108            it though, so we potentially confuse users that they can run
 109            BookE guests on BookS. Let's hope nobody dares enough :) */
 110         return 0;
 111     } else {
 112         if (!cap_segstate) {
 113             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 114             return -ENOSYS;
 115         }
 116     }
 117
 118     ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
 119     if (ret) {
 120         return ret;
 121     }
 122
 123     sregs.pvr = cenv->spr[SPR_PVR];
 124     return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
 125 }
 126
 127 /* Set up a shared TLB array with KVM */
 128 static int kvm_booke206_tlb_init(CPUPPCState *env)
 129 {
 130     struct kvm_book3e_206_tlb_params params = {};
 131     struct kvm_config_tlb cfg = {};
 132     struct kvm_enable_cap encap = {};
 133     unsigned int entries = 0;
 134     int ret, i;
 135
 136     if (!kvm_enabled() ||
 137         !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
 138         return 0;
 139     }
 140
 141     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 142
 143     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 144         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 145         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 146         entries += params.tlb_sizes[i];
 147     }
 148
 149     assert(entries == env->nb_tlb);
 150     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 151
 152     env->tlb_dirty = true;
 153
 154     cfg.array = (uintptr_t)env->tlb.tlbm;
 155     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 156     cfg.params = (uintptr_t)&params;
 157     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 158
 159     encap.cap = KVM_CAP_SW_TLB;
 160     encap.args[0] = (uintptr_t)&cfg;
 161
 162     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
 163     if (ret < 0) {
 164         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 165                 __func__, strerror(-ret));
 166         return ret;
 167     }
 168
 169     env->kvm_sw_tlb = true;
 170     return 0;
 171 }
 172
 173
 174 #if defined(TARGET_PPC64)
 175 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
 176                                        struct kvm_ppc_smmu_info *info)
 177 {
 178     memset(info, 0, sizeof(*info));
 179
 180     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 181      * need to "guess" what the supported page sizes are.
 182      *
 183      * For that to work we make a few assumptions:
 184      *
 185      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 186      *   KVM which only supports 4K and 16M pages, but supports them
 187      *   regardless of the backing store characteritics. We also don't
 188      *   support 1T segments.
 189      *
 190      *   This is safe as if HV KVM ever supports that capability or PR
 191      *   KVM grows supports for more page/segment sizes, those versions
 192      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 193      *   will not hit this fallback
 194      *
 195      * - Else we are running HV KVM. This means we only support page
 196      *   sizes that fit in the backing store. Additionally we only
 197      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 198      *   P7 encodings for the SLB and hash table. Here too, we assume
 199      *   support for any newer processor will mean a kernel that
 200      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 201      *   this fallback.
 202      */
 203     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 204         /* No flags */
 205         info->flags = 0;
 206         info->slb_size = 64;
 207
 208         /* Standard 4k base page size segment */
 209         info->sps[0].page_shift = 12;
 210         info->sps[0].slb_enc = 0;
 211         info->sps[0].enc[0].page_shift = 12;
 212         info->sps[0].enc[0].pte_enc = 0;
 213
 214         /* Standard 16M large page size segment */
 215         info->sps[1].page_shift = 24;
 216         info->sps[1].slb_enc = SLB_VSID_L;
 217         info->sps[1].enc[0].page_shift = 24;
 218         info->sps[1].enc[0].pte_enc = 0;
 219     } else {
 220         int i = 0;
 221
 222         /* HV KVM has backing store size restrictions */
 223         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 224
 225         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 226             info->flags |= KVM_PPC_1T_SEGMENTS;
 227         }
 228
 229         if (env->mmu_model == POWERPC_MMU_2_06) {
 230             info->slb_size = 32;
 231         } else {
 232             info->slb_size = 64;
 233         }
 234
 235         /* Standard 4k base page size segment */
 236         info->sps[i].page_shift = 12;
 237         info->sps[i].slb_enc = 0;
 238         info->sps[i].enc[0].page_shift = 12;
 239         info->sps[i].enc[0].pte_enc = 0;
 240         i++;
 241
 242         /* 64K on MMU 2.06 */
 243         if (env->mmu_model == POWERPC_MMU_2_06) {
 244             info->sps[i].page_shift = 16;
 245             info->sps[i].slb_enc = 0x110;
 246             info->sps[i].enc[0].page_shift = 16;
 247             info->sps[i].enc[0].pte_enc = 1;
 248             i++;
 249         }
 250
 251         /* Standard 16M large page size segment */
 252         info->sps[i].page_shift = 24;
 253         info->sps[i].slb_enc = SLB_VSID_L;
 254         info->sps[i].enc[0].page_shift = 24;
 255         info->sps[i].enc[0].pte_enc = 0;
 256     }
 257 }
 258
 259 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
 260 {
 261     int ret;
 262
 263     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 264         ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 265         if (ret == 0) {
 266             return;
 267         }
 268     }
 269
 270     kvm_get_fallback_smmu_info(env, info);
 271 }
 272
 273 static long getrampagesize(void)
 274 {
 275     struct statfs fs;
 276     int ret;
 277
 278     if (!mem_path) {
 279         /* guest RAM is backed by normal anonymous pages */
 280         return getpagesize();
 281     }
 282
 283     do {
 284         ret = statfs(mem_path, &fs);
 285     } while (ret != 0 && errno == EINTR);
 286
 287     if (ret != 0) {
 288         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 289                 strerror(errno));
 290         exit(1);
 291     }
 292
 293 #define HUGETLBFS_MAGIC       0x958458f6
 294
 295     if (fs.f_type != HUGETLBFS_MAGIC) {
 296         /* Explicit mempath, but it's ordinary pages */
 297         return getpagesize();
 298     }
 299
 300     /* It's hugepage, return the huge page size */
 301     return fs.f_bsize;
 302 }
 303
 304 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 305 {
 306     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 307         return true;
 308     }
 309
 310     return (1ul << shift) <= rampgsize;
 311 }
 312
 313 static void kvm_fixup_page_sizes(CPUPPCState *env)
 314 {
 315     static struct kvm_ppc_smmu_info smmu_info;
 316     static bool has_smmu_info;
 317     long rampagesize;
 318     int iq, ik, jq, jk;
 319
 320     /* We only handle page sizes for 64-bit server guests for now */
 321     if (!(env->mmu_model & POWERPC_MMU_64)) {
 322         return;
 323     }
 324
 325     /* Collect MMU info from kernel if not already */
 326     if (!has_smmu_info) {
 327         kvm_get_smmu_info(env, &smmu_info);
 328         has_smmu_info = true;
 329     }
 330
 331     rampagesize = getrampagesize();
 332
 333     /* Convert to QEMU form */
 334     memset(&env->sps, 0, sizeof(env->sps));
 335
 336     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 337         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 338         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 339
 340         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 341                                  ksps->page_shift)) {
 342             continue;
 343         }
 344         qsps->page_shift = ksps->page_shift;
 345         qsps->slb_enc = ksps->slb_enc;
 346         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 347             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 348                                      ksps->enc[jk].page_shift)) {
 349                 continue;
 350             }
 351             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 352             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 353             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 354                 break;
 355             }
 356         }
 357         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 358             break;
 359         }
 360     }
 361     env->slb_nr = smmu_info.slb_size;
 362     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 363         env->mmu_model |= POWERPC_MMU_1TSEG;
 364     } else {
 365         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 366     }
 367 }
 368 #else /* defined (TARGET_PPC64) */
 369
 370 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
 371 {
 372 }
 373
 374 #endif /* !defined (TARGET_PPC64) */
 375
 376 int kvm_arch_init_vcpu(CPUPPCState *cenv)
 377 {
 378     int ret;
 379
 380     /* Gather server mmu info from KVM and update the CPU state */
 381     kvm_fixup_page_sizes(cenv);
 382
 383     /* Synchronize sregs with kvm */
 384     ret = kvm_arch_sync_sregs(cenv);
 385     if (ret) {
 386         return ret;
 387     }
 388
 389     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
 390
 391     /* Some targets support access to KVM's guest TLB. */
 392     switch (cenv->mmu_model) {
 393     case POWERPC_MMU_BOOKE206:
 394         ret = kvm_booke206_tlb_init(cenv);
 395         break;
 396     default:
 397         break;
 398     }
 399
 400     return ret;
 401 }
 402
 403 void kvm_arch_reset_vcpu(CPUPPCState *env)
 404 {
 405 }
 406
 407 static void kvm_sw_tlb_put(CPUPPCState *env)
 408 {
 409     struct kvm_dirty_tlb dirty_tlb;
 410     unsigned char *bitmap;
 411     int ret;
 412
 413     if (!env->kvm_sw_tlb) {
 414         return;
 415     }
 416
 417     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 418     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 419
 420     dirty_tlb.bitmap = (uintptr_t)bitmap;
 421     dirty_tlb.num_dirty = env->nb_tlb;
 422
 423     ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
 424     if (ret) {
 425         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 426                 __func__, strerror(-ret));
 427     }
 428
 429     g_free(bitmap);
 430 }
 431
 432 int kvm_arch_put_registers(CPUPPCState *env, int level)
 433 {
 434     struct kvm_regs regs;
 435     int ret;
 436     int i;
 437
 438     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 439     if (ret < 0)
 440         return ret;
 441
 442     regs.ctr = env->ctr;
 443     regs.lr  = env->lr;
 444     regs.xer = env->xer;
 445     regs.msr = env->msr;
 446     regs.pc = env->nip;
 447
 448     regs.srr0 = env->spr[SPR_SRR0];
 449     regs.srr1 = env->spr[SPR_SRR1];
 450
 451     regs.sprg0 = env->spr[SPR_SPRG0];
 452     regs.sprg1 = env->spr[SPR_SPRG1];
 453     regs.sprg2 = env->spr[SPR_SPRG2];
 454     regs.sprg3 = env->spr[SPR_SPRG3];
 455     regs.sprg4 = env->spr[SPR_SPRG4];
 456     regs.sprg5 = env->spr[SPR_SPRG5];
 457     regs.sprg6 = env->spr[SPR_SPRG6];
 458     regs.sprg7 = env->spr[SPR_SPRG7];
 459
 460     regs.pid = env->spr[SPR_BOOKE_PID];
 461
 462     for (i = 0;i < 32; i++)
 463         regs.gpr[i] = env->gpr[i];
 464
 465     ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
 466     if (ret < 0)
 467         return ret;
 468
 469     if (env->tlb_dirty) {
 470         kvm_sw_tlb_put(env);
 471         env->tlb_dirty = false;
 472     }
 473
 474     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 475         struct kvm_sregs sregs;
 476
 477         sregs.pvr = env->spr[SPR_PVR];
 478
 479         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 480
 481         /* Sync SLB */
 482 #ifdef TARGET_PPC64
 483         for (i = 0; i < 64; i++) {
 484             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 485             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 486         }
 487 #endif
 488
 489         /* Sync SRs */
 490         for (i = 0; i < 16; i++) {
 491             sregs.u.s.ppc32.sr[i] = env->sr[i];
 492         }
 493
 494         /* Sync BATs */
 495         for (i = 0; i < 8; i++) {
 496             /* Beware. We have to swap upper and lower bits here */
 497             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 498                 | env->DBAT[1][i];
 499             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 500                 | env->IBAT[1][i];
 501         }
 502
 503         ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
 504         if (ret) {
 505             return ret;
 506         }
 507     }
 508
 509     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 510         uint64_t hior = env->spr[SPR_HIOR];
 511         struct kvm_one_reg reg = {
 512             .id = KVM_REG_PPC_HIOR,
 513             .addr = (uintptr_t) &hior,
 514         };
 515
 516         ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
 517         if (ret) {
 518             return ret;
 519         }
 520     }
 521
 522     return ret;
 523 }
 524
 525 int kvm_arch_get_registers(CPUPPCState *env)
 526 {
 527     struct kvm_regs regs;
 528     struct kvm_sregs sregs;
 529     uint32_t cr;
 530     int i, ret;
 531
 532     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 533     if (ret < 0)
 534         return ret;
 535
 536     cr = regs.cr;
 537     for (i = 7; i >= 0; i--) {
 538         env->crf[i] = cr & 15;
 539         cr >>= 4;
 540     }
 541
 542     env->ctr = regs.ctr;
 543     env->lr = regs.lr;
 544     env->xer = regs.xer;
 545     env->msr = regs.msr;
 546     env->nip = regs.pc;
 547
 548     env->spr[SPR_SRR0] = regs.srr0;
 549     env->spr[SPR_SRR1] = regs.srr1;
 550
 551     env->spr[SPR_SPRG0] = regs.sprg0;
 552     env->spr[SPR_SPRG1] = regs.sprg1;
 553     env->spr[SPR_SPRG2] = regs.sprg2;
 554     env->spr[SPR_SPRG3] = regs.sprg3;
 555     env->spr[SPR_SPRG4] = regs.sprg4;
 556     env->spr[SPR_SPRG5] = regs.sprg5;
 557     env->spr[SPR_SPRG6] = regs.sprg6;
 558     env->spr[SPR_SPRG7] = regs.sprg7;
 559
 560     env->spr[SPR_BOOKE_PID] = regs.pid;
 561
 562     for (i = 0;i < 32; i++)
 563         env->gpr[i] = regs.gpr[i];
 564
 565     if (cap_booke_sregs) {
 566         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 567         if (ret < 0) {
 568             return ret;
 569         }
 570
 571         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 572             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 573             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 574             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 575             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 576             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 577             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 578             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 579             env->spr[SPR_DECR] = sregs.u.e.dec;
 580             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 581             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 582             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 583         }
 584
 585         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 586             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 587             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 588             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 589             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 590             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 591         }
 592
 593         if (sregs.u.e.features & KVM_SREGS_E_64) {
 594             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 595         }
 596
 597         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 598             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 599         }
 600
 601         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 602             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 603             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 604             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 605             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 606             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 607             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 608             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 609             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 610             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 611             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 612             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 613             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 614             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 615             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 616             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 617             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 618
 619             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 620                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 621                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 622                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 623             }
 624
 625             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 626                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 627             }
 628
 629             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 630                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 631                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 632             }
 633         }
 634
 635         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 636             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 637             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 638             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 639             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 640             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 641             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 642             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 643             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 644             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 645             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 646         }
 647
 648         if (sregs.u.e.features & KVM_SREGS_EXP) {
 649             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
 650         }
 651
 652         if (sregs.u.e.features & KVM_SREGS_E_PD) {
 653             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
 654             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
 655         }
 656
 657         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
 658             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
 659             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
 660             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
 661
 662             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
 663                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
 664                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
 665             }
 666         }
 667     }
 668
 669     if (cap_segstate) {
 670         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 671         if (ret < 0) {
 672             return ret;
 673         }
 674
 675         ppc_store_sdr1(env, sregs.u.s.sdr1);
 676
 677         /* Sync SLB */
 678 #ifdef TARGET_PPC64
 679         for (i = 0; i < 64; i++) {
 680             ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
 681                                sregs.u.s.ppc64.slb[i].slbv);
 682         }
 683 #endif
 684
 685         /* Sync SRs */
 686         for (i = 0; i < 16; i++) {
 687             env->sr[i] = sregs.u.s.ppc32.sr[i];
 688         }
 689
 690         /* Sync BATs */
 691         for (i = 0; i < 8; i++) {
 692             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
 693             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
 694             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
 695             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
 696         }
 697     }
 698
 699     return 0;
 700 }
 701
 702 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
 703 {
 704     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 705
 706     if (irq != PPC_INTERRUPT_EXT) {
 707         return 0;
 708     }
 709
 710     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
 711         return 0;
 712     }
 713
 714     kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
 715
 716     return 0;
 717 }
 718
 719 #if defined(TARGET_PPCEMB)
 720 #define PPC_INPUT_INT PPC40x_INPUT_INT
 721 #elif defined(TARGET_PPC64)
 722 #define PPC_INPUT_INT PPC970_INPUT_INT
 723 #else
 724 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 725 #endif
 726
 727 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
 728 {
 729     int r;
 730     unsigned irq;
 731
 732     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
 733      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
 734     if (!cap_interrupt_level &&
 735         run->ready_for_interrupt_injection &&
 736         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
 737         (env->irq_input_state & (1<<PPC_INPUT_INT)))
 738     {
 739         /* For now KVM disregards the 'irq' argument. However, in the
 740          * future KVM could cache it in-kernel to avoid a heavyweight exit
 741          * when reading the UIC.
 742          */
 743         irq = KVM_INTERRUPT_SET;
 744
 745         dprintf("injected interrupt %d\n", irq);
 746         r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
 747         if (r < 0)
 748             printf("cpu %d fail inject %x\n", env->cpu_index, irq);
 749
 750         /* Always wake up soon in case the interrupt was level based */
 751         qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
 752                        (get_ticks_per_sec() / 50));
 753     }
 754
 755     /* We don't know if there are more interrupts pending after this. However,
 756      * the guest will return to userspace in the course of handling this one
 757      * anyways, so we will get a chance to deliver the rest. */
 758 }
 759
 760 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
 761 {
 762 }
 763
 764 int kvm_arch_process_async_events(CPUPPCState *env)
 765 {
 766     return env->halted;
 767 }
 768
 769 static int kvmppc_handle_halt(CPUPPCState *env)
 770 {
 771     if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
 772         env->halted = 1;
 773         env->exception_index = EXCP_HLT;
 774     }
 775
 776     return 0;
 777 }
 778
 779 /* map dcr access to existing qemu dcr emulation */
 780 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
 781 {
 782     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
 783         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
 784
 785     return 0;
 786 }
 787
 788 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
 789 {
 790     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
 791         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
 792
 793     return 0;
 794 }
 795
 796 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
 797 {
 798     int ret;
 799
 800     switch (run->exit_reason) {
 801     case KVM_EXIT_DCR:
 802         if (run->dcr.is_write) {
 803             dprintf("handle dcr write\n");
 804             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
 805         } else {
 806             dprintf("handle dcr read\n");
 807             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
 808         }
 809         break;
 810     case KVM_EXIT_HLT:
 811         dprintf("handle halt\n");
 812         ret = kvmppc_handle_halt(env);
 813         break;
 814 #ifdef CONFIG_PSERIES
 815     case KVM_EXIT_PAPR_HCALL:
 816         dprintf("handle PAPR hypercall\n");
 817         run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
 818                                               run->papr_hcall.args);
 819         ret = 0;
 820         break;
 821 #endif
 822     default:
 823         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
 824         ret = -1;
 825         break;
 826     }
 827
 828     return ret;
 829 }
 830
 831 static int read_cpuinfo(const char *field, char *value, int len)
 832 {
 833     FILE *f;
 834     int ret = -1;
 835     int field_len = strlen(field);
 836     char line[512];
 837
 838     f = fopen("/proc/cpuinfo", "r");
 839     if (!f) {
 840         return -1;
 841     }
 842
 843     do {
 844         if(!fgets(line, sizeof(line), f)) {
 845             break;
 846         }
 847         if (!strncmp(line, field, field_len)) {
 848             strncpy(value, line, len);
 849             ret = 0;
 850             break;
 851         }
 852     } while(*line);
 853
 854     fclose(f);
 855
 856     return ret;
 857 }
 858
 859 uint32_t kvmppc_get_tbfreq(void)
 860 {
 861     char line[512];
 862     char *ns;
 863     uint32_t retval = get_ticks_per_sec();
 864
 865     if (read_cpuinfo("timebase", line, sizeof(line))) {
 866         return retval;
 867     }
 868
 869     if (!(ns = strchr(line, ':'))) {
 870         return retval;
 871     }
 872
 873     ns++;
 874
 875     retval = atoi(ns);
 876     return retval;
 877 }
 878
 879 /* Try to find a device tree node for a CPU with clock-frequency property */
 880 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 881 {
 882     struct dirent *dirp;
 883     DIR *dp;
 884
 885     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
 886         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
 887         return -1;
 888     }
 889
 890     buf[0] = '\0';
 891     while ((dirp = readdir(dp)) != NULL) {
 892         FILE *f;
 893         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
 894                  dirp->d_name);
 895         f = fopen(buf, "r");
 896         if (f) {
 897             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
 898             fclose(f);
 899             break;
 900         }
 901         buf[0] = '\0';
 902     }
 903     closedir(dp);
 904     if (buf[0] == '\0') {
 905         printf("Unknown host!\n");
 906         return -1;
 907     }
 908
 909     return 0;
 910 }
 911
 912 /* Read a CPU node property from the host device tree that's a single
 913  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
 914  * (can't find or open the property, or doesn't understand the
 915  * format) */
 916 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
 917 {
 918     char buf[PATH_MAX];
 919     union {
 920         uint32_t v32;
 921         uint64_t v64;
 922     } u;
 923     FILE *f;
 924     int len;
 925
 926     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
 927         return -1;
 928     }
 929
 930     strncat(buf, "/", sizeof(buf) - strlen(buf));
 931     strncat(buf, propname, sizeof(buf) - strlen(buf));
 932
 933     f = fopen(buf, "rb");
 934     if (!f) {
 935         return -1;
 936     }
 937
 938     len = fread(&u, 1, sizeof(u), f);
 939     fclose(f);
 940     switch (len) {
 941     case 4:
 942         /* property is a 32-bit quantity */
 943         return be32_to_cpu(u.v32);
 944     case 8:
 945         return be64_to_cpu(u.v64);
 946     }
 947
 948     return 0;
 949 }
 950
 951 uint64_t kvmppc_get_clockfreq(void)
 952 {
 953     return kvmppc_read_int_cpu_dt("clock-frequency");
 954 }
 955
 956 uint32_t kvmppc_get_vmx(void)
 957 {
 958     return kvmppc_read_int_cpu_dt("ibm,vmx");
 959 }
 960
 961 uint32_t kvmppc_get_dfp(void)
 962 {
 963     return kvmppc_read_int_cpu_dt("ibm,dfp");
 964 }
 965
 966 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 967 {
 968     uint32_t *hc = (uint32_t*)buf;
 969
 970     struct kvm_ppc_pvinfo pvinfo;
 971
 972     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
 973         !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
 974         memcpy(buf, pvinfo.hcall, buf_len);
 975
 976         return 0;
 977     }
 978
 979     /*
 980      * Fallback to always fail hypercalls:
 981      *
 982      *     li r3, -1
 983      *     nop
 984      *     nop
 985      *     nop
 986      */
 987
 988     hc[0] = 0x3860ffff;
 989     hc[1] = 0x60000000;
 990     hc[2] = 0x60000000;
 991     hc[3] = 0x60000000;
 992
 993     return 0;
 994 }
 995
 996 void kvmppc_set_papr(CPUPPCState *env)
 997 {
 998     struct kvm_enable_cap cap = {};
 999     int ret;
1000
1001     cap.cap = KVM_CAP_PPC_PAPR;
1002     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1003
1004     if (ret) {
1005         cpu_abort(env, "This KVM version does not support PAPR\n");
1006     }
1007 }
1008
1009 int kvmppc_smt_threads(void)
1010 {
1011     return cap_ppc_smt ? cap_ppc_smt : 1;
1012 }
1013
1014 #ifdef TARGET_PPC64
1015 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1016 {
1017     void *rma;
1018     off_t size;
1019     int fd;
1020     struct kvm_allocate_rma ret;
1021     MemoryRegion *rma_region;
1022
1023     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1024      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1025      *                      not necessary on this hardware
1026      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1027      *
1028      * FIXME: We should allow the user to force contiguous RMA
1029      * allocation in the cap_ppc_rma==1 case.
1030      */
1031     if (cap_ppc_rma < 2) {
1032         return 0;
1033     }
1034
1035     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1036     if (fd < 0) {
1037         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1038                 strerror(errno));
1039         return -1;
1040     }
1041
1042     size = MIN(ret.rma_size, 256ul << 20);
1043
1044     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1045     if (rma == MAP_FAILED) {
1046         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1047         return -1;
1048     };
1049
1050     rma_region = g_new(MemoryRegion, 1);
1051     memory_region_init_ram_ptr(rma_region, name, size, rma);
1052     vmstate_register_ram_global(rma_region);
1053     memory_region_add_subregion(sysmem, 0, rma_region);
1054
1055     return size;
1056 }
1057
1058 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1059 {
1060     if (cap_ppc_rma >= 2) {
1061         return current_size;
1062     }
1063     return MIN(current_size,
1064                getrampagesize() << (hash_shift - 7));
1065 }
1066 #endif
1067
1068 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1069 {
1070     struct kvm_create_spapr_tce args = {
1071         .liobn = liobn,
1072         .window_size = window_size,
1073     };
1074     long len;
1075     int fd;
1076     void *table;
1077
1078     /* Must set fd to -1 so we don't try to munmap when called for
1079      * destroying the table, which the upper layers -will- do
1080      */
1081     *pfd = -1;
1082     if (!cap_spapr_tce) {
1083         return NULL;
1084     }
1085
1086     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1087     if (fd < 0) {
1088         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1089                 liobn);
1090         return NULL;
1091     }
1092
1093     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1094     /* FIXME: round this up to page size */
1095
1096     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1097     if (table == MAP_FAILED) {
1098         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1099                 liobn);
1100         close(fd);
1101         return NULL;
1102     }
1103
1104     *pfd = fd;
1105     return table;
1106 }
1107
1108 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1109 {
1110     long len;
1111
1112     if (fd < 0) {
1113         return -1;
1114     }
1115
1116     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1117     if ((munmap(table, len) < 0) ||
1118         (close(fd) < 0)) {
1119         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1120                 strerror(errno));
1121         /* Leak the table */
1122     }
1123
1124     return 0;
1125 }
1126
1127 int kvmppc_reset_htab(int shift_hint)
1128 {
1129     uint32_t shift = shift_hint;
1130
1131     if (!kvm_enabled()) {
1132         /* Full emulation, tell caller to allocate htab itself */
1133         return 0;
1134     }
1135     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1136         int ret;
1137         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1138         if (ret == -ENOTTY) {
1139             /* At least some versions of PR KVM advertise the
1140              * capability, but don't implement the ioctl().  Oops.
1141              * Return 0 so that we allocate the htab in qemu, as is
1142              * correct for PR. */
1143             return 0;
1144         } else if (ret < 0) {
1145             return ret;
1146         }
1147         return shift;
1148     }
1149
1150     /* We have a kernel that predates the htab reset calls.  For PR
1151      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1152      * this era, it has allocated a 16MB fixed size hash table
1153      * already.  Kernels of this era have the GET_PVINFO capability
1154      * only on PR, so we use this hack to determine the right
1155      * answer */
1156     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1157         /* PR - tell caller to allocate htab */
1158         return 0;
1159     } else {
1160         /* HV - assume 16MB kernel allocated htab */
1161         return 24;
1162     }
1163 }
1164
1165 static inline uint32_t mfpvr(void)
1166 {
1167     uint32_t pvr;
1168
1169     asm ("mfpvr %0"
1170          : "=r"(pvr));
1171     return pvr;
1172 }
1173
1174 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1175 {
1176     if (on) {
1177         *word |= flags;
1178     } else {
1179         *word &= ~flags;
1180     }
1181 }
1182
1183 const ppc_def_t *kvmppc_host_cpu_def(void)
1184 {
1185     uint32_t host_pvr = mfpvr();
1186     const ppc_def_t *base_spec;
1187     ppc_def_t *spec;
1188     uint32_t vmx = kvmppc_get_vmx();
1189     uint32_t dfp = kvmppc_get_dfp();
1190
1191     base_spec = ppc_find_by_pvr(host_pvr);
1192
1193     spec = g_malloc0(sizeof(*spec));
1194     memcpy(spec, base_spec, sizeof(*spec));
1195
1196     /* Now fix up the spec with information we can query from the host */
1197
1198     if (vmx != -1) {
1199         /* Only override when we know what the host supports */
1200         alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1201         alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1202     }
1203     if (dfp != -1) {
1204         /* Only override when we know what the host supports */
1205         alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1206     }
1207
1208     return spec;
1209 }
1210
1211 int kvmppc_fixup_cpu(CPUPPCState *env)
1212 {
1213     int smt;
1214
1215     /* Adjust cpu index for SMT */
1216     smt = kvmppc_smt_threads();
1217     env->cpu_index = (env->cpu_index / smp_threads) * smt
1218         + (env->cpu_index % smp_threads);
1219
1220     return 0;
1221 }
1222
1223
1224 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1225 {
1226     return true;
1227 }
1228
1229 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1230 {
1231     return 1;
1232 }
1233
1234 int kvm_arch_on_sigbus(int code, void *addr)
1235 {
1236     return 1;
1237 }