target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "cpu-models.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/hw_accel.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "exec/ram_addr.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #include "qemu/mmap-alloc.h"
  49 #if defined(TARGET_PPC64)
  50 #include "hw/ppc/spapr_cpu_core.h"
  51 #endif
  52
  53 //#define DEBUG_KVM
  54
  55 #ifdef DEBUG_KVM
  56 #define DPRINTF(fmt, ...) \
  57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58 #else
  59 #define DPRINTF(fmt, ...) \
  60     do { } while (0)
  61 #endif
  62
  63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66     KVM_CAP_LAST_INFO
  67 };
  68
  69 static int cap_interrupt_unset = false;
  70 static int cap_interrupt_level = false;
  71 static int cap_segstate;
  72 static int cap_booke_sregs;
  73 static int cap_ppc_smt;
  74 static int cap_ppc_rma;
  75 static int cap_spapr_tce;
  76 static int cap_spapr_multitce;
  77 static int cap_spapr_vfio;
  78 static int cap_hior;
  79 static int cap_one_reg;
  80 static int cap_epr;
  81 static int cap_ppc_watchdog;
  82 static int cap_papr;
  83 static int cap_htab_fd;
  84 static int cap_fixup_hcalls;
  85 static int cap_htm;             /* Hardware transactional memory support */
  86
  87 static uint32_t debug_inst_opcode;
  88
  89 /* XXX We have a race condition where we actually have a level triggered
  90  *     interrupt, but the infrastructure can't expose that yet, so the guest
  91  *     takes but ignores it, goes to sleep and never gets notified that there's
  92  *     still an interrupt pending.
  93  *
  94  *     As a quick workaround, let's just wake up again 20 ms after we injected
  95  *     an interrupt. That way we can assure that we're always reinjecting
  96  *     interrupts in case the guest swallowed them.
  97  */
  98 static QEMUTimer *idle_timer;
  99
 100 static void kvm_kick_cpu(void *opaque)
 101 {
 102     PowerPCCPU *cpu = opaque;
 103
 104     qemu_cpu_kick(CPU(cpu));
 105 }
 106
 107 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 108  * should only be used for fallback tests - generally we should use
 109  * explicit capabilities for the features we want, rather than
 110  * assuming what is/isn't available depending on the KVM variant. */
 111 static bool kvmppc_is_pr(KVMState *ks)
 112 {
 113     /* Assume KVM-PR if the GET_PVINFO capability is available */
 114     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 115 }
 116
 117 static int kvm_ppc_register_host_cpu_type(void);
 118
 119 int kvm_arch_init(MachineState *ms, KVMState *s)
 120 {
 121     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 122     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 123     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 124     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 125     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 126     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 127     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 128     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 129     cap_spapr_vfio = false;
 130     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 131     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 132     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 133     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 134     /* Note: we don't set cap_papr here, because this capability is
 135      * only activated after this by kvmppc_set_papr() */
 136     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 137     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 138     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 139
 140     if (!cap_interrupt_level) {
 141         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 142                         "VM to stall at times!\n");
 143     }
 144
 145     kvm_ppc_register_host_cpu_type();
 146
 147     return 0;
 148 }
 149
 150 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 151 {
 152     return 0;
 153 }
 154
 155 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 156 {
 157     CPUPPCState *cenv = &cpu->env;
 158     CPUState *cs = CPU(cpu);
 159     struct kvm_sregs sregs;
 160     int ret;
 161
 162     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 163         /* What we're really trying to say is "if we're on BookE, we use
 164            the native PVR for now". This is the only sane way to check
 165            it though, so we potentially confuse users that they can run
 166            BookE guests on BookS. Let's hope nobody dares enough :) */
 167         return 0;
 168     } else {
 169         if (!cap_segstate) {
 170             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 171             return -ENOSYS;
 172         }
 173     }
 174
 175     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 176     if (ret) {
 177         return ret;
 178     }
 179
 180     sregs.pvr = cenv->spr[SPR_PVR];
 181     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 182 }
 183
 184 /* Set up a shared TLB array with KVM */
 185 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 186 {
 187     CPUPPCState *env = &cpu->env;
 188     CPUState *cs = CPU(cpu);
 189     struct kvm_book3e_206_tlb_params params = {};
 190     struct kvm_config_tlb cfg = {};
 191     unsigned int entries = 0;
 192     int ret, i;
 193
 194     if (!kvm_enabled() ||
 195         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 196         return 0;
 197     }
 198
 199     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 200
 201     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 202         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 203         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 204         entries += params.tlb_sizes[i];
 205     }
 206
 207     assert(entries == env->nb_tlb);
 208     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 209
 210     env->tlb_dirty = true;
 211
 212     cfg.array = (uintptr_t)env->tlb.tlbm;
 213     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 214     cfg.params = (uintptr_t)&params;
 215     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 216
 217     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 218     if (ret < 0) {
 219         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 220                 __func__, strerror(-ret));
 221         return ret;
 222     }
 223
 224     env->kvm_sw_tlb = true;
 225     return 0;
 226 }
 227
 228
 229 #if defined(TARGET_PPC64)
 230 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 231                                        struct kvm_ppc_smmu_info *info)
 232 {
 233     CPUPPCState *env = &cpu->env;
 234     CPUState *cs = CPU(cpu);
 235
 236     memset(info, 0, sizeof(*info));
 237
 238     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 239      * need to "guess" what the supported page sizes are.
 240      *
 241      * For that to work we make a few assumptions:
 242      *
 243      * - Check whether we are running "PR" KVM which only supports 4K
 244      *   and 16M pages, but supports them regardless of the backing
 245      *   store characteritics. We also don't support 1T segments.
 246      *
 247      *   This is safe as if HV KVM ever supports that capability or PR
 248      *   KVM grows supports for more page/segment sizes, those versions
 249      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 250      *   will not hit this fallback
 251      *
 252      * - Else we are running HV KVM. This means we only support page
 253      *   sizes that fit in the backing store. Additionally we only
 254      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 255      *   P7 encodings for the SLB and hash table. Here too, we assume
 256      *   support for any newer processor will mean a kernel that
 257      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 258      *   this fallback.
 259      */
 260     if (kvmppc_is_pr(cs->kvm_state)) {
 261         /* No flags */
 262         info->flags = 0;
 263         info->slb_size = 64;
 264
 265         /* Standard 4k base page size segment */
 266         info->sps[0].page_shift = 12;
 267         info->sps[0].slb_enc = 0;
 268         info->sps[0].enc[0].page_shift = 12;
 269         info->sps[0].enc[0].pte_enc = 0;
 270
 271         /* Standard 16M large page size segment */
 272         info->sps[1].page_shift = 24;
 273         info->sps[1].slb_enc = SLB_VSID_L;
 274         info->sps[1].enc[0].page_shift = 24;
 275         info->sps[1].enc[0].pte_enc = 0;
 276     } else {
 277         int i = 0;
 278
 279         /* HV KVM has backing store size restrictions */
 280         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 281
 282         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 283             info->flags |= KVM_PPC_1T_SEGMENTS;
 284         }
 285
 286         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 287            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 288             info->slb_size = 32;
 289         } else {
 290             info->slb_size = 64;
 291         }
 292
 293         /* Standard 4k base page size segment */
 294         info->sps[i].page_shift = 12;
 295         info->sps[i].slb_enc = 0;
 296         info->sps[i].enc[0].page_shift = 12;
 297         info->sps[i].enc[0].pte_enc = 0;
 298         i++;
 299
 300         /* 64K on MMU 2.06 and later */
 301         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 302             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 303             info->sps[i].page_shift = 16;
 304             info->sps[i].slb_enc = 0x110;
 305             info->sps[i].enc[0].page_shift = 16;
 306             info->sps[i].enc[0].pte_enc = 1;
 307             i++;
 308         }
 309
 310         /* Standard 16M large page size segment */
 311         info->sps[i].page_shift = 24;
 312         info->sps[i].slb_enc = SLB_VSID_L;
 313         info->sps[i].enc[0].page_shift = 24;
 314         info->sps[i].enc[0].pte_enc = 0;
 315     }
 316 }
 317
 318 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 319 {
 320     CPUState *cs = CPU(cpu);
 321     int ret;
 322
 323     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 324         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 325         if (ret == 0) {
 326             return;
 327         }
 328     }
 329
 330     kvm_get_fallback_smmu_info(cpu, info);
 331 }
 332
 333 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 334 {
 335     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 336         return true;
 337     }
 338
 339     return (1ul << shift) <= rampgsize;
 340 }
 341
 342 static long max_cpu_page_size;
 343
 344 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 345 {
 346     static struct kvm_ppc_smmu_info smmu_info;
 347     static bool has_smmu_info;
 348     CPUPPCState *env = &cpu->env;
 349     int iq, ik, jq, jk;
 350     bool has_64k_pages = false;
 351
 352     /* We only handle page sizes for 64-bit server guests for now */
 353     if (!(env->mmu_model & POWERPC_MMU_64)) {
 354         return;
 355     }
 356
 357     /* Collect MMU info from kernel if not already */
 358     if (!has_smmu_info) {
 359         kvm_get_smmu_info(cpu, &smmu_info);
 360         has_smmu_info = true;
 361     }
 362
 363     if (!max_cpu_page_size) {
 364         max_cpu_page_size = qemu_getrampagesize();
 365     }
 366
 367     /* Convert to QEMU form */
 368     memset(&env->sps, 0, sizeof(env->sps));
 369
 370     /* If we have HV KVM, we need to forbid CI large pages if our
 371      * host page size is smaller than 64K.
 372      */
 373     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 374         env->ci_large_pages = getpagesize() >= 0x10000;
 375     }
 376
 377     /*
 378      * XXX This loop should be an entry wide AND of the capabilities that
 379      *     the selected CPU has with the capabilities that KVM supports.
 380      */
 381     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 382         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 383         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 384
 385         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 386                                  ksps->page_shift)) {
 387             continue;
 388         }
 389         qsps->page_shift = ksps->page_shift;
 390         qsps->slb_enc = ksps->slb_enc;
 391         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 392             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 393                                      ksps->enc[jk].page_shift)) {
 394                 continue;
 395             }
 396             if (ksps->enc[jk].page_shift == 16) {
 397                 has_64k_pages = true;
 398             }
 399             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 400             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 401             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 402                 break;
 403             }
 404         }
 405         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 406             break;
 407         }
 408     }
 409     env->slb_nr = smmu_info.slb_size;
 410     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 411         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 412     }
 413     if (!has_64k_pages) {
 414         env->mmu_model &= ~POWERPC_MMU_64K;
 415     }
 416 }
 417
 418 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 419 {
 420     Object *mem_obj = object_resolve_path(obj_path, NULL);
 421     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 422     long pagesize;
 423
 424     if (mempath) {
 425         pagesize = qemu_mempath_getpagesize(mempath);
 426     } else {
 427         pagesize = getpagesize();
 428     }
 429
 430     return pagesize >= max_cpu_page_size;
 431 }
 432
 433 #else /* defined (TARGET_PPC64) */
 434
 435 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 436 {
 437 }
 438
 439 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 440 {
 441     return true;
 442 }
 443
 444 #endif /* !defined (TARGET_PPC64) */
 445
 446 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 447 {
 448     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 449 }
 450
 451 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 452  * book3s supports only 1 watchpoint, so array size
 453  * of 4 is sufficient for now.
 454  */
 455 #define MAX_HW_BKPTS 4
 456
 457 static struct HWBreakpoint {
 458     target_ulong addr;
 459     int type;
 460 } hw_debug_points[MAX_HW_BKPTS];
 461
 462 static CPUWatchpoint hw_watchpoint;
 463
 464 /* Default there is no breakpoint and watchpoint supported */
 465 static int max_hw_breakpoint;
 466 static int max_hw_watchpoint;
 467 static int nb_hw_breakpoint;
 468 static int nb_hw_watchpoint;
 469
 470 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 471 {
 472     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 473         max_hw_breakpoint = 2;
 474         max_hw_watchpoint = 2;
 475     }
 476
 477     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 478         fprintf(stderr, "Error initializing h/w breakpoints\n");
 479         return;
 480     }
 481 }
 482
 483 int kvm_arch_init_vcpu(CPUState *cs)
 484 {
 485     PowerPCCPU *cpu = POWERPC_CPU(cs);
 486     CPUPPCState *cenv = &cpu->env;
 487     int ret;
 488
 489     /* Gather server mmu info from KVM and update the CPU state */
 490     kvm_fixup_page_sizes(cpu);
 491
 492     /* Synchronize sregs with kvm */
 493     ret = kvm_arch_sync_sregs(cpu);
 494     if (ret) {
 495         if (ret == -EINVAL) {
 496             error_report("Register sync failed... If you're using kvm-hv.ko,"
 497                          " only \"-cpu host\" is possible");
 498         }
 499         return ret;
 500     }
 501
 502     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 503
 504     switch (cenv->mmu_model) {
 505     case POWERPC_MMU_BOOKE206:
 506         /* This target supports access to KVM's guest TLB */
 507         ret = kvm_booke206_tlb_init(cpu);
 508         break;
 509     case POWERPC_MMU_2_07:
 510         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 511             /* KVM-HV has transactional memory on POWER8 also without the
 512              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
 513             cap_htm = true;
 514         }
 515         break;
 516     default:
 517         break;
 518     }
 519
 520     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 521     kvmppc_hw_debug_points_init(cenv);
 522
 523     return ret;
 524 }
 525
 526 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 527 {
 528     CPUPPCState *env = &cpu->env;
 529     CPUState *cs = CPU(cpu);
 530     struct kvm_dirty_tlb dirty_tlb;
 531     unsigned char *bitmap;
 532     int ret;
 533
 534     if (!env->kvm_sw_tlb) {
 535         return;
 536     }
 537
 538     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 539     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 540
 541     dirty_tlb.bitmap = (uintptr_t)bitmap;
 542     dirty_tlb.num_dirty = env->nb_tlb;
 543
 544     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 545     if (ret) {
 546         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 547                 __func__, strerror(-ret));
 548     }
 549
 550     g_free(bitmap);
 551 }
 552
 553 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 554 {
 555     PowerPCCPU *cpu = POWERPC_CPU(cs);
 556     CPUPPCState *env = &cpu->env;
 557     union {
 558         uint32_t u32;
 559         uint64_t u64;
 560     } val;
 561     struct kvm_one_reg reg = {
 562         .id = id,
 563         .addr = (uintptr_t) &val,
 564     };
 565     int ret;
 566
 567     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 568     if (ret != 0) {
 569         trace_kvm_failed_spr_get(spr, strerror(errno));
 570     } else {
 571         switch (id & KVM_REG_SIZE_MASK) {
 572         case KVM_REG_SIZE_U32:
 573             env->spr[spr] = val.u32;
 574             break;
 575
 576         case KVM_REG_SIZE_U64:
 577             env->spr[spr] = val.u64;
 578             break;
 579
 580         default:
 581             /* Don't handle this size yet */
 582             abort();
 583         }
 584     }
 585 }
 586
 587 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 588 {
 589     PowerPCCPU *cpu = POWERPC_CPU(cs);
 590     CPUPPCState *env = &cpu->env;
 591     union {
 592         uint32_t u32;
 593         uint64_t u64;
 594     } val;
 595     struct kvm_one_reg reg = {
 596         .id = id,
 597         .addr = (uintptr_t) &val,
 598     };
 599     int ret;
 600
 601     switch (id & KVM_REG_SIZE_MASK) {
 602     case KVM_REG_SIZE_U32:
 603         val.u32 = env->spr[spr];
 604         break;
 605
 606     case KVM_REG_SIZE_U64:
 607         val.u64 = env->spr[spr];
 608         break;
 609
 610     default:
 611         /* Don't handle this size yet */
 612         abort();
 613     }
 614
 615     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 616     if (ret != 0) {
 617         trace_kvm_failed_spr_set(spr, strerror(errno));
 618     }
 619 }
 620
 621 static int kvm_put_fp(CPUState *cs)
 622 {
 623     PowerPCCPU *cpu = POWERPC_CPU(cs);
 624     CPUPPCState *env = &cpu->env;
 625     struct kvm_one_reg reg;
 626     int i;
 627     int ret;
 628
 629     if (env->insns_flags & PPC_FLOAT) {
 630         uint64_t fpscr = env->fpscr;
 631         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 632
 633         reg.id = KVM_REG_PPC_FPSCR;
 634         reg.addr = (uintptr_t)&fpscr;
 635         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 636         if (ret < 0) {
 637             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 638             return ret;
 639         }
 640
 641         for (i = 0; i < 32; i++) {
 642             uint64_t vsr[2];
 643
 644 #ifdef HOST_WORDS_BIGENDIAN
 645             vsr[0] = float64_val(env->fpr[i]);
 646             vsr[1] = env->vsr[i];
 647 #else
 648             vsr[0] = env->vsr[i];
 649             vsr[1] = float64_val(env->fpr[i]);
 650 #endif
 651             reg.addr = (uintptr_t) &vsr;
 652             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 653
 654             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 655             if (ret < 0) {
 656                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 657                         i, strerror(errno));
 658                 return ret;
 659             }
 660         }
 661     }
 662
 663     if (env->insns_flags & PPC_ALTIVEC) {
 664         reg.id = KVM_REG_PPC_VSCR;
 665         reg.addr = (uintptr_t)&env->vscr;
 666         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 667         if (ret < 0) {
 668             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 669             return ret;
 670         }
 671
 672         for (i = 0; i < 32; i++) {
 673             reg.id = KVM_REG_PPC_VR(i);
 674             reg.addr = (uintptr_t)&env->avr[i];
 675             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 676             if (ret < 0) {
 677                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 678                 return ret;
 679             }
 680         }
 681     }
 682
 683     return 0;
 684 }
 685
 686 static int kvm_get_fp(CPUState *cs)
 687 {
 688     PowerPCCPU *cpu = POWERPC_CPU(cs);
 689     CPUPPCState *env = &cpu->env;
 690     struct kvm_one_reg reg;
 691     int i;
 692     int ret;
 693
 694     if (env->insns_flags & PPC_FLOAT) {
 695         uint64_t fpscr;
 696         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 697
 698         reg.id = KVM_REG_PPC_FPSCR;
 699         reg.addr = (uintptr_t)&fpscr;
 700         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 701         if (ret < 0) {
 702             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 703             return ret;
 704         } else {
 705             env->fpscr = fpscr;
 706         }
 707
 708         for (i = 0; i < 32; i++) {
 709             uint64_t vsr[2];
 710
 711             reg.addr = (uintptr_t) &vsr;
 712             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 713
 714             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 715             if (ret < 0) {
 716                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 717                         vsx ? "VSR" : "FPR", i, strerror(errno));
 718                 return ret;
 719             } else {
 720 #ifdef HOST_WORDS_BIGENDIAN
 721                 env->fpr[i] = vsr[0];
 722                 if (vsx) {
 723                     env->vsr[i] = vsr[1];
 724                 }
 725 #else
 726                 env->fpr[i] = vsr[1];
 727                 if (vsx) {
 728                     env->vsr[i] = vsr[0];
 729                 }
 730 #endif
 731             }
 732         }
 733     }
 734
 735     if (env->insns_flags & PPC_ALTIVEC) {
 736         reg.id = KVM_REG_PPC_VSCR;
 737         reg.addr = (uintptr_t)&env->vscr;
 738         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 739         if (ret < 0) {
 740             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 741             return ret;
 742         }
 743
 744         for (i = 0; i < 32; i++) {
 745             reg.id = KVM_REG_PPC_VR(i);
 746             reg.addr = (uintptr_t)&env->avr[i];
 747             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 748             if (ret < 0) {
 749                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 750                         i, strerror(errno));
 751                 return ret;
 752             }
 753         }
 754     }
 755
 756     return 0;
 757 }
 758
 759 #if defined(TARGET_PPC64)
 760 static int kvm_get_vpa(CPUState *cs)
 761 {
 762     PowerPCCPU *cpu = POWERPC_CPU(cs);
 763     CPUPPCState *env = &cpu->env;
 764     struct kvm_one_reg reg;
 765     int ret;
 766
 767     reg.id = KVM_REG_PPC_VPA_ADDR;
 768     reg.addr = (uintptr_t)&env->vpa_addr;
 769     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 770     if (ret < 0) {
 771         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 772         return ret;
 773     }
 774
 775     assert((uintptr_t)&env->slb_shadow_size
 776            == ((uintptr_t)&env->slb_shadow_addr + 8));
 777     reg.id = KVM_REG_PPC_VPA_SLB;
 778     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 779     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 780     if (ret < 0) {
 781         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 782                 strerror(errno));
 783         return ret;
 784     }
 785
 786     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 787     reg.id = KVM_REG_PPC_VPA_DTL;
 788     reg.addr = (uintptr_t)&env->dtl_addr;
 789     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 790     if (ret < 0) {
 791         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 792                 strerror(errno));
 793         return ret;
 794     }
 795
 796     return 0;
 797 }
 798
 799 static int kvm_put_vpa(CPUState *cs)
 800 {
 801     PowerPCCPU *cpu = POWERPC_CPU(cs);
 802     CPUPPCState *env = &cpu->env;
 803     struct kvm_one_reg reg;
 804     int ret;
 805
 806     /* SLB shadow or DTL can't be registered unless a master VPA is
 807      * registered.  That means when restoring state, if a VPA *is*
 808      * registered, we need to set that up first.  If not, we need to
 809      * deregister the others before deregistering the master VPA */
 810     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 811
 812     if (env->vpa_addr) {
 813         reg.id = KVM_REG_PPC_VPA_ADDR;
 814         reg.addr = (uintptr_t)&env->vpa_addr;
 815         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 816         if (ret < 0) {
 817             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 818             return ret;
 819         }
 820     }
 821
 822     assert((uintptr_t)&env->slb_shadow_size
 823            == ((uintptr_t)&env->slb_shadow_addr + 8));
 824     reg.id = KVM_REG_PPC_VPA_SLB;
 825     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 826     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 827     if (ret < 0) {
 828         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 829         return ret;
 830     }
 831
 832     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 833     reg.id = KVM_REG_PPC_VPA_DTL;
 834     reg.addr = (uintptr_t)&env->dtl_addr;
 835     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 836     if (ret < 0) {
 837         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 838                 strerror(errno));
 839         return ret;
 840     }
 841
 842     if (!env->vpa_addr) {
 843         reg.id = KVM_REG_PPC_VPA_ADDR;
 844         reg.addr = (uintptr_t)&env->vpa_addr;
 845         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 846         if (ret < 0) {
 847             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 848             return ret;
 849         }
 850     }
 851
 852     return 0;
 853 }
 854 #endif /* TARGET_PPC64 */
 855
 856 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 857 {
 858     CPUPPCState *env = &cpu->env;
 859     struct kvm_sregs sregs;
 860     int i;
 861
 862     sregs.pvr = env->spr[SPR_PVR];
 863
 864     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 865
 866     /* Sync SLB */
 867 #ifdef TARGET_PPC64
 868     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 869         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 870         if (env->slb[i].esid & SLB_ESID_V) {
 871             sregs.u.s.ppc64.slb[i].slbe |= i;
 872         }
 873         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 874     }
 875 #endif
 876
 877     /* Sync SRs */
 878     for (i = 0; i < 16; i++) {
 879         sregs.u.s.ppc32.sr[i] = env->sr[i];
 880     }
 881
 882     /* Sync BATs */
 883     for (i = 0; i < 8; i++) {
 884         /* Beware. We have to swap upper and lower bits here */
 885         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 886             | env->DBAT[1][i];
 887         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 888             | env->IBAT[1][i];
 889     }
 890
 891     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 892 }
 893
 894 int kvm_arch_put_registers(CPUState *cs, int level)
 895 {
 896     PowerPCCPU *cpu = POWERPC_CPU(cs);
 897     CPUPPCState *env = &cpu->env;
 898     struct kvm_regs regs;
 899     int ret;
 900     int i;
 901
 902     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 903     if (ret < 0) {
 904         return ret;
 905     }
 906
 907     regs.ctr = env->ctr;
 908     regs.lr  = env->lr;
 909     regs.xer = cpu_read_xer(env);
 910     regs.msr = env->msr;
 911     regs.pc = env->nip;
 912
 913     regs.srr0 = env->spr[SPR_SRR0];
 914     regs.srr1 = env->spr[SPR_SRR1];
 915
 916     regs.sprg0 = env->spr[SPR_SPRG0];
 917     regs.sprg1 = env->spr[SPR_SPRG1];
 918     regs.sprg2 = env->spr[SPR_SPRG2];
 919     regs.sprg3 = env->spr[SPR_SPRG3];
 920     regs.sprg4 = env->spr[SPR_SPRG4];
 921     regs.sprg5 = env->spr[SPR_SPRG5];
 922     regs.sprg6 = env->spr[SPR_SPRG6];
 923     regs.sprg7 = env->spr[SPR_SPRG7];
 924
 925     regs.pid = env->spr[SPR_BOOKE_PID];
 926
 927     for (i = 0;i < 32; i++)
 928         regs.gpr[i] = env->gpr[i];
 929
 930     regs.cr = 0;
 931     for (i = 0; i < 8; i++) {
 932         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 933     }
 934
 935     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 936     if (ret < 0)
 937         return ret;
 938
 939     kvm_put_fp(cs);
 940
 941     if (env->tlb_dirty) {
 942         kvm_sw_tlb_put(cpu);
 943         env->tlb_dirty = false;
 944     }
 945
 946     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 947         ret = kvmppc_put_books_sregs(cpu);
 948         if (ret < 0) {
 949             return ret;
 950         }
 951     }
 952
 953     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 954         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 955     }
 956
 957     if (cap_one_reg) {
 958         int i;
 959
 960         /* We deliberately ignore errors here, for kernels which have
 961          * the ONE_REG calls, but don't support the specific
 962          * registers, there's a reasonable chance things will still
 963          * work, at least until we try to migrate. */
 964         for (i = 0; i < 1024; i++) {
 965             uint64_t id = env->spr_cb[i].one_reg_id;
 966
 967             if (id != 0) {
 968                 kvm_put_one_spr(cs, id, i);
 969             }
 970         }
 971
 972 #ifdef TARGET_PPC64
 973         if (msr_ts) {
 974             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 975                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 976             }
 977             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 978                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 979             }
 980             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 990         }
 991
 992         if (cap_papr) {
 993             if (kvm_put_vpa(cs) < 0) {
 994                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 995             }
 996         }
 997
 998         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 999 #endif /* TARGET_PPC64 */
1000     }
1001
1002     return ret;
1003 }
1004
1005 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1006 {
1007      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1008 }
1009
1010 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1011 {
1012     CPUPPCState *env = &cpu->env;
1013     struct kvm_sregs sregs;
1014     int ret;
1015
1016     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1017     if (ret < 0) {
1018         return ret;
1019     }
1020
1021     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1022         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1023         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1024         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1025         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1026         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1027         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1028         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1029         env->spr[SPR_DECR] = sregs.u.e.dec;
1030         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1031         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1032         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1033     }
1034
1035     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1036         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1037         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1038         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1039         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1040         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1041     }
1042
1043     if (sregs.u.e.features & KVM_SREGS_E_64) {
1044         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1045     }
1046
1047     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1048         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1049     }
1050
1051     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1052         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1053         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1054         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1055         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1056         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1057         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1058         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1059         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1060         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1061         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1062         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1063         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1064         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1065         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1066         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1067         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1068         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1069         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1070         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1071         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1072         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1073         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1074         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1075         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1076         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1077         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1078         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1079         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1080         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1081         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1082         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1083         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1084
1085         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1086             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1087             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1088             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1089             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1090             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1091             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1092         }
1093
1094         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1095             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1096             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1097         }
1098
1099         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1100             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1101             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1102             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1103             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1104         }
1105     }
1106
1107     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1108         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1109         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1110         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1111         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1112         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1113         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1114         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1115         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1116         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1117         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1118     }
1119
1120     if (sregs.u.e.features & KVM_SREGS_EXP) {
1121         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1122     }
1123
1124     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1125         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1126         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1127     }
1128
1129     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1130         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1131         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1132         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1133
1134         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1135             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1136             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1137         }
1138     }
1139
1140     return 0;
1141 }
1142
1143 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1144 {
1145     CPUPPCState *env = &cpu->env;
1146     struct kvm_sregs sregs;
1147     int ret;
1148     int i;
1149
1150     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1151     if (ret < 0) {
1152         return ret;
1153     }
1154
1155     if (!cpu->vhyp) {
1156         ppc_store_sdr1(env, sregs.u.s.sdr1);
1157     }
1158
1159     /* Sync SLB */
1160 #ifdef TARGET_PPC64
1161     /*
1162      * The packed SLB array we get from KVM_GET_SREGS only contains
1163      * information about valid entries. So we flush our internal copy
1164      * to get rid of stale ones, then put all valid SLB entries back
1165      * in.
1166      */
1167     memset(env->slb, 0, sizeof(env->slb));
1168     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1169         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1170         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1171         /*
1172          * Only restore valid entries
1173          */
1174         if (rb & SLB_ESID_V) {
1175             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1176         }
1177     }
1178 #endif
1179
1180     /* Sync SRs */
1181     for (i = 0; i < 16; i++) {
1182         env->sr[i] = sregs.u.s.ppc32.sr[i];
1183     }
1184
1185     /* Sync BATs */
1186     for (i = 0; i < 8; i++) {
1187         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1188         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1189         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1190         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1191     }
1192
1193     return 0;
1194 }
1195
1196 int kvm_arch_get_registers(CPUState *cs)
1197 {
1198     PowerPCCPU *cpu = POWERPC_CPU(cs);
1199     CPUPPCState *env = &cpu->env;
1200     struct kvm_regs regs;
1201     uint32_t cr;
1202     int i, ret;
1203
1204     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1205     if (ret < 0)
1206         return ret;
1207
1208     cr = regs.cr;
1209     for (i = 7; i >= 0; i--) {
1210         env->crf[i] = cr & 15;
1211         cr >>= 4;
1212     }
1213
1214     env->ctr = regs.ctr;
1215     env->lr = regs.lr;
1216     cpu_write_xer(env, regs.xer);
1217     env->msr = regs.msr;
1218     env->nip = regs.pc;
1219
1220     env->spr[SPR_SRR0] = regs.srr0;
1221     env->spr[SPR_SRR1] = regs.srr1;
1222
1223     env->spr[SPR_SPRG0] = regs.sprg0;
1224     env->spr[SPR_SPRG1] = regs.sprg1;
1225     env->spr[SPR_SPRG2] = regs.sprg2;
1226     env->spr[SPR_SPRG3] = regs.sprg3;
1227     env->spr[SPR_SPRG4] = regs.sprg4;
1228     env->spr[SPR_SPRG5] = regs.sprg5;
1229     env->spr[SPR_SPRG6] = regs.sprg6;
1230     env->spr[SPR_SPRG7] = regs.sprg7;
1231
1232     env->spr[SPR_BOOKE_PID] = regs.pid;
1233
1234     for (i = 0;i < 32; i++)
1235         env->gpr[i] = regs.gpr[i];
1236
1237     kvm_get_fp(cs);
1238
1239     if (cap_booke_sregs) {
1240         ret = kvmppc_get_booke_sregs(cpu);
1241         if (ret < 0) {
1242             return ret;
1243         }
1244     }
1245
1246     if (cap_segstate) {
1247         ret = kvmppc_get_books_sregs(cpu);
1248         if (ret < 0) {
1249             return ret;
1250         }
1251     }
1252
1253     if (cap_hior) {
1254         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1255     }
1256
1257     if (cap_one_reg) {
1258         int i;
1259
1260         /* We deliberately ignore errors here, for kernels which have
1261          * the ONE_REG calls, but don't support the specific
1262          * registers, there's a reasonable chance things will still
1263          * work, at least until we try to migrate. */
1264         for (i = 0; i < 1024; i++) {
1265             uint64_t id = env->spr_cb[i].one_reg_id;
1266
1267             if (id != 0) {
1268                 kvm_get_one_spr(cs, id, i);
1269             }
1270         }
1271
1272 #ifdef TARGET_PPC64
1273         if (msr_ts) {
1274             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1275                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1276             }
1277             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1278                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1279             }
1280             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1286             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1287             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1288             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1289             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1290         }
1291
1292         if (cap_papr) {
1293             if (kvm_get_vpa(cs) < 0) {
1294                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1295             }
1296         }
1297
1298         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1299 #endif
1300     }
1301
1302     return 0;
1303 }
1304
1305 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1306 {
1307     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1308
1309     if (irq != PPC_INTERRUPT_EXT) {
1310         return 0;
1311     }
1312
1313     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1314         return 0;
1315     }
1316
1317     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1318
1319     return 0;
1320 }
1321
1322 #if defined(TARGET_PPCEMB)
1323 #define PPC_INPUT_INT PPC40x_INPUT_INT
1324 #elif defined(TARGET_PPC64)
1325 #define PPC_INPUT_INT PPC970_INPUT_INT
1326 #else
1327 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1328 #endif
1329
1330 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1331 {
1332     PowerPCCPU *cpu = POWERPC_CPU(cs);
1333     CPUPPCState *env = &cpu->env;
1334     int r;
1335     unsigned irq;
1336
1337     qemu_mutex_lock_iothread();
1338
1339     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1340      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1341     if (!cap_interrupt_level &&
1342         run->ready_for_interrupt_injection &&
1343         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1344         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1345     {
1346         /* For now KVM disregards the 'irq' argument. However, in the
1347          * future KVM could cache it in-kernel to avoid a heavyweight exit
1348          * when reading the UIC.
1349          */
1350         irq = KVM_INTERRUPT_SET;
1351
1352         DPRINTF("injected interrupt %d\n", irq);
1353         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1354         if (r < 0) {
1355             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1356         }
1357
1358         /* Always wake up soon in case the interrupt was level based */
1359         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1360                        (NANOSECONDS_PER_SECOND / 50));
1361     }
1362
1363     /* We don't know if there are more interrupts pending after this. However,
1364      * the guest will return to userspace in the course of handling this one
1365      * anyways, so we will get a chance to deliver the rest. */
1366
1367     qemu_mutex_unlock_iothread();
1368 }
1369
1370 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1371 {
1372     return MEMTXATTRS_UNSPECIFIED;
1373 }
1374
1375 int kvm_arch_process_async_events(CPUState *cs)
1376 {
1377     return cs->halted;
1378 }
1379
1380 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1381 {
1382     CPUState *cs = CPU(cpu);
1383     CPUPPCState *env = &cpu->env;
1384
1385     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1386         cs->halted = 1;
1387         cs->exception_index = EXCP_HLT;
1388     }
1389
1390     return 0;
1391 }
1392
1393 /* map dcr access to existing qemu dcr emulation */
1394 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1395 {
1396     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1397         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1398
1399     return 0;
1400 }
1401
1402 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1403 {
1404     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1405         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1406
1407     return 0;
1408 }
1409
1410 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1411 {
1412     /* Mixed endian case is not handled */
1413     uint32_t sc = debug_inst_opcode;
1414
1415     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1416                             sizeof(sc), 0) ||
1417         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1418         return -EINVAL;
1419     }
1420
1421     return 0;
1422 }
1423
1424 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1425 {
1426     uint32_t sc;
1427
1428     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1429         sc != debug_inst_opcode ||
1430         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1431                             sizeof(sc), 1)) {
1432         return -EINVAL;
1433     }
1434
1435     return 0;
1436 }
1437
1438 static int find_hw_breakpoint(target_ulong addr, int type)
1439 {
1440     int n;
1441
1442     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1443            <= ARRAY_SIZE(hw_debug_points));
1444
1445     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1446         if (hw_debug_points[n].addr == addr &&
1447              hw_debug_points[n].type == type) {
1448             return n;
1449         }
1450     }
1451
1452     return -1;
1453 }
1454
1455 static int find_hw_watchpoint(target_ulong addr, int *flag)
1456 {
1457     int n;
1458
1459     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1460     if (n >= 0) {
1461         *flag = BP_MEM_ACCESS;
1462         return n;
1463     }
1464
1465     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1466     if (n >= 0) {
1467         *flag = BP_MEM_WRITE;
1468         return n;
1469     }
1470
1471     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1472     if (n >= 0) {
1473         *flag = BP_MEM_READ;
1474         return n;
1475     }
1476
1477     return -1;
1478 }
1479
1480 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1481                                   target_ulong len, int type)
1482 {
1483     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1484         return -ENOBUFS;
1485     }
1486
1487     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1488     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1489
1490     switch (type) {
1491     case GDB_BREAKPOINT_HW:
1492         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1493             return -ENOBUFS;
1494         }
1495
1496         if (find_hw_breakpoint(addr, type) >= 0) {
1497             return -EEXIST;
1498         }
1499
1500         nb_hw_breakpoint++;
1501         break;
1502
1503     case GDB_WATCHPOINT_WRITE:
1504     case GDB_WATCHPOINT_READ:
1505     case GDB_WATCHPOINT_ACCESS:
1506         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1507             return -ENOBUFS;
1508         }
1509
1510         if (find_hw_breakpoint(addr, type) >= 0) {
1511             return -EEXIST;
1512         }
1513
1514         nb_hw_watchpoint++;
1515         break;
1516
1517     default:
1518         return -ENOSYS;
1519     }
1520
1521     return 0;
1522 }
1523
1524 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1525                                   target_ulong len, int type)
1526 {
1527     int n;
1528
1529     n = find_hw_breakpoint(addr, type);
1530     if (n < 0) {
1531         return -ENOENT;
1532     }
1533
1534     switch (type) {
1535     case GDB_BREAKPOINT_HW:
1536         nb_hw_breakpoint--;
1537         break;
1538
1539     case GDB_WATCHPOINT_WRITE:
1540     case GDB_WATCHPOINT_READ:
1541     case GDB_WATCHPOINT_ACCESS:
1542         nb_hw_watchpoint--;
1543         break;
1544
1545     default:
1546         return -ENOSYS;
1547     }
1548     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1549
1550     return 0;
1551 }
1552
1553 void kvm_arch_remove_all_hw_breakpoints(void)
1554 {
1555     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1556 }
1557
1558 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1559 {
1560     int n;
1561
1562     /* Software Breakpoint updates */
1563     if (kvm_sw_breakpoints_active(cs)) {
1564         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1565     }
1566
1567     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1568            <= ARRAY_SIZE(hw_debug_points));
1569     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1570
1571     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1572         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1573         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1574         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1575             switch (hw_debug_points[n].type) {
1576             case GDB_BREAKPOINT_HW:
1577                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1578                 break;
1579             case GDB_WATCHPOINT_WRITE:
1580                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1581                 break;
1582             case GDB_WATCHPOINT_READ:
1583                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1584                 break;
1585             case GDB_WATCHPOINT_ACCESS:
1586                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1587                                         KVMPPC_DEBUG_WATCH_READ;
1588                 break;
1589             default:
1590                 cpu_abort(cs, "Unsupported breakpoint type\n");
1591             }
1592             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1593         }
1594     }
1595 }
1596
1597 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1598 {
1599     CPUState *cs = CPU(cpu);
1600     CPUPPCState *env = &cpu->env;
1601     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1602     int handle = 0;
1603     int n;
1604     int flag = 0;
1605
1606     if (cs->singlestep_enabled) {
1607         handle = 1;
1608     } else if (arch_info->status) {
1609         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1610             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1611                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1612                 if (n >= 0) {
1613                     handle = 1;
1614                 }
1615             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1616                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1617                 n = find_hw_watchpoint(arch_info->address,  &flag);
1618                 if (n >= 0) {
1619                     handle = 1;
1620                     cs->watchpoint_hit = &hw_watchpoint;
1621                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1622                     hw_watchpoint.flags = flag;
1623                 }
1624             }
1625         }
1626     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1627         handle = 1;
1628     } else {
1629         /* QEMU is not able to handle debug exception, so inject
1630          * program exception to guest;
1631          * Yes program exception NOT debug exception !!
1632          * When QEMU is using debug resources then debug exception must
1633          * be always set. To achieve this we set MSR_DE and also set
1634          * MSRP_DEP so guest cannot change MSR_DE.
1635          * When emulating debug resource for guest we want guest
1636          * to control MSR_DE (enable/disable debug interrupt on need).
1637          * Supporting both configurations are NOT possible.
1638          * So the result is that we cannot share debug resources
1639          * between QEMU and Guest on BOOKE architecture.
1640          * In the current design QEMU gets the priority over guest,
1641          * this means that if QEMU is using debug resources then guest
1642          * cannot use them;
1643          * For software breakpoint QEMU uses a privileged instruction;
1644          * So there cannot be any reason that we are here for guest
1645          * set debug exception, only possibility is guest executed a
1646          * privileged / illegal instruction and that's why we are
1647          * injecting a program interrupt.
1648          */
1649
1650         cpu_synchronize_state(cs);
1651         /* env->nip is PC, so increment this by 4 to use
1652          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1653          */
1654         env->nip += 4;
1655         cs->exception_index = POWERPC_EXCP_PROGRAM;
1656         env->error_code = POWERPC_EXCP_INVAL;
1657         ppc_cpu_do_interrupt(cs);
1658     }
1659
1660     return handle;
1661 }
1662
1663 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1664 {
1665     PowerPCCPU *cpu = POWERPC_CPU(cs);
1666     CPUPPCState *env = &cpu->env;
1667     int ret;
1668
1669     qemu_mutex_lock_iothread();
1670
1671     switch (run->exit_reason) {
1672     case KVM_EXIT_DCR:
1673         if (run->dcr.is_write) {
1674             DPRINTF("handle dcr write\n");
1675             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1676         } else {
1677             DPRINTF("handle dcr read\n");
1678             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1679         }
1680         break;
1681     case KVM_EXIT_HLT:
1682         DPRINTF("handle halt\n");
1683         ret = kvmppc_handle_halt(cpu);
1684         break;
1685 #if defined(TARGET_PPC64)
1686     case KVM_EXIT_PAPR_HCALL:
1687         DPRINTF("handle PAPR hypercall\n");
1688         run->papr_hcall.ret = spapr_hypercall(cpu,
1689                                               run->papr_hcall.nr,
1690                                               run->papr_hcall.args);
1691         ret = 0;
1692         break;
1693 #endif
1694     case KVM_EXIT_EPR:
1695         DPRINTF("handle epr\n");
1696         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1697         ret = 0;
1698         break;
1699     case KVM_EXIT_WATCHDOG:
1700         DPRINTF("handle watchdog expiry\n");
1701         watchdog_perform_action();
1702         ret = 0;
1703         break;
1704
1705     case KVM_EXIT_DEBUG:
1706         DPRINTF("handle debug exception\n");
1707         if (kvm_handle_debug(cpu, run)) {
1708             ret = EXCP_DEBUG;
1709             break;
1710         }
1711         /* re-enter, this exception was guest-internal */
1712         ret = 0;
1713         break;
1714
1715     default:
1716         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1717         ret = -1;
1718         break;
1719     }
1720
1721     qemu_mutex_unlock_iothread();
1722     return ret;
1723 }
1724
1725 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1726 {
1727     CPUState *cs = CPU(cpu);
1728     uint32_t bits = tsr_bits;
1729     struct kvm_one_reg reg = {
1730         .id = KVM_REG_PPC_OR_TSR,
1731         .addr = (uintptr_t) &bits,
1732     };
1733
1734     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1735 }
1736
1737 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1738 {
1739
1740     CPUState *cs = CPU(cpu);
1741     uint32_t bits = tsr_bits;
1742     struct kvm_one_reg reg = {
1743         .id = KVM_REG_PPC_CLEAR_TSR,
1744         .addr = (uintptr_t) &bits,
1745     };
1746
1747     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1748 }
1749
1750 int kvmppc_set_tcr(PowerPCCPU *cpu)
1751 {
1752     CPUState *cs = CPU(cpu);
1753     CPUPPCState *env = &cpu->env;
1754     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1755
1756     struct kvm_one_reg reg = {
1757         .id = KVM_REG_PPC_TCR,
1758         .addr = (uintptr_t) &tcr,
1759     };
1760
1761     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1762 }
1763
1764 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1765 {
1766     CPUState *cs = CPU(cpu);
1767     int ret;
1768
1769     if (!kvm_enabled()) {
1770         return -1;
1771     }
1772
1773     if (!cap_ppc_watchdog) {
1774         printf("warning: KVM does not support watchdog");
1775         return -1;
1776     }
1777
1778     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1779     if (ret < 0) {
1780         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1781                 __func__, strerror(-ret));
1782         return ret;
1783     }
1784
1785     return ret;
1786 }
1787
1788 static int read_cpuinfo(const char *field, char *value, int len)
1789 {
1790     FILE *f;
1791     int ret = -1;
1792     int field_len = strlen(field);
1793     char line[512];
1794
1795     f = fopen("/proc/cpuinfo", "r");
1796     if (!f) {
1797         return -1;
1798     }
1799
1800     do {
1801         if (!fgets(line, sizeof(line), f)) {
1802             break;
1803         }
1804         if (!strncmp(line, field, field_len)) {
1805             pstrcpy(value, len, line);
1806             ret = 0;
1807             break;
1808         }
1809     } while(*line);
1810
1811     fclose(f);
1812
1813     return ret;
1814 }
1815
1816 uint32_t kvmppc_get_tbfreq(void)
1817 {
1818     char line[512];
1819     char *ns;
1820     uint32_t retval = NANOSECONDS_PER_SECOND;
1821
1822     if (read_cpuinfo("timebase", line, sizeof(line))) {
1823         return retval;
1824     }
1825
1826     if (!(ns = strchr(line, ':'))) {
1827         return retval;
1828     }
1829
1830     ns++;
1831
1832     return atoi(ns);
1833 }
1834
1835 bool kvmppc_get_host_serial(char **value)
1836 {
1837     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1838                                NULL);
1839 }
1840
1841 bool kvmppc_get_host_model(char **value)
1842 {
1843     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1844 }
1845
1846 /* Try to find a device tree node for a CPU with clock-frequency property */
1847 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1848 {
1849     struct dirent *dirp;
1850     DIR *dp;
1851
1852     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1853         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1854         return -1;
1855     }
1856
1857     buf[0] = '\0';
1858     while ((dirp = readdir(dp)) != NULL) {
1859         FILE *f;
1860         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1861                  dirp->d_name);
1862         f = fopen(buf, "r");
1863         if (f) {
1864             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1865             fclose(f);
1866             break;
1867         }
1868         buf[0] = '\0';
1869     }
1870     closedir(dp);
1871     if (buf[0] == '\0') {
1872         printf("Unknown host!\n");
1873         return -1;
1874     }
1875
1876     return 0;
1877 }
1878
1879 static uint64_t kvmppc_read_int_dt(const char *filename)
1880 {
1881     union {
1882         uint32_t v32;
1883         uint64_t v64;
1884     } u;
1885     FILE *f;
1886     int len;
1887
1888     f = fopen(filename, "rb");
1889     if (!f) {
1890         return -1;
1891     }
1892
1893     len = fread(&u, 1, sizeof(u), f);
1894     fclose(f);
1895     switch (len) {
1896     case 4:
1897         /* property is a 32-bit quantity */
1898         return be32_to_cpu(u.v32);
1899     case 8:
1900         return be64_to_cpu(u.v64);
1901     }
1902
1903     return 0;
1904 }
1905
1906 /* Read a CPU node property from the host device tree that's a single
1907  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1908  * (can't find or open the property, or doesn't understand the
1909  * format) */
1910 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1911 {
1912     char buf[PATH_MAX], *tmp;
1913     uint64_t val;
1914
1915     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1916         return -1;
1917     }
1918
1919     tmp = g_strdup_printf("%s/%s", buf, propname);
1920     val = kvmppc_read_int_dt(tmp);
1921     g_free(tmp);
1922
1923     return val;
1924 }
1925
1926 uint64_t kvmppc_get_clockfreq(void)
1927 {
1928     return kvmppc_read_int_cpu_dt("clock-frequency");
1929 }
1930
1931 uint32_t kvmppc_get_vmx(void)
1932 {
1933     return kvmppc_read_int_cpu_dt("ibm,vmx");
1934 }
1935
1936 uint32_t kvmppc_get_dfp(void)
1937 {
1938     return kvmppc_read_int_cpu_dt("ibm,dfp");
1939 }
1940
1941 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1942  {
1943      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1944      CPUState *cs = CPU(cpu);
1945
1946     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1947         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1948         return 0;
1949     }
1950
1951     return 1;
1952 }
1953
1954 int kvmppc_get_hasidle(CPUPPCState *env)
1955 {
1956     struct kvm_ppc_pvinfo pvinfo;
1957
1958     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1959         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1960         return 1;
1961     }
1962
1963     return 0;
1964 }
1965
1966 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1967 {
1968     uint32_t *hc = (uint32_t*)buf;
1969     struct kvm_ppc_pvinfo pvinfo;
1970
1971     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1972         memcpy(buf, pvinfo.hcall, buf_len);
1973         return 0;
1974     }
1975
1976     /*
1977      * Fallback to always fail hypercalls regardless of endianness:
1978      *
1979      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1980      *     li r3, -1
1981      *     b .+8       (becomes nop in wrong endian)
1982      *     bswap32(li r3, -1)
1983      */
1984
1985     hc[0] = cpu_to_be32(0x08000048);
1986     hc[1] = cpu_to_be32(0x3860ffff);
1987     hc[2] = cpu_to_be32(0x48000008);
1988     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1989
1990     return 1;
1991 }
1992
1993 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1994 {
1995     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1996 }
1997
1998 void kvmppc_enable_logical_ci_hcalls(void)
1999 {
2000     /*
2001      * FIXME: it would be nice if we could detect the cases where
2002      * we're using a device which requires the in kernel
2003      * implementation of these hcalls, but the kernel lacks them and
2004      * produce a warning.
2005      */
2006     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2007     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2008 }
2009
2010 void kvmppc_enable_set_mode_hcall(void)
2011 {
2012     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2013 }
2014
2015 void kvmppc_enable_clear_ref_mod_hcalls(void)
2016 {
2017     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2018     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2019 }
2020
2021 void kvmppc_set_papr(PowerPCCPU *cpu)
2022 {
2023     CPUState *cs = CPU(cpu);
2024     int ret;
2025
2026     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2027     if (ret) {
2028         error_report("This vCPU type or KVM version does not support PAPR");
2029         exit(1);
2030     }
2031
2032     /* Update the capability flag so we sync the right information
2033      * with kvm */
2034     cap_papr = 1;
2035 }
2036
2037 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2038 {
2039     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2040 }
2041
2042 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2043 {
2044     CPUState *cs = CPU(cpu);
2045     int ret;
2046
2047     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2048     if (ret && mpic_proxy) {
2049         error_report("This KVM version does not support EPR");
2050         exit(1);
2051     }
2052 }
2053
2054 int kvmppc_smt_threads(void)
2055 {
2056     return cap_ppc_smt ? cap_ppc_smt : 1;
2057 }
2058
2059 #ifdef TARGET_PPC64
2060 off_t kvmppc_alloc_rma(void **rma)
2061 {
2062     off_t size;
2063     int fd;
2064     struct kvm_allocate_rma ret;
2065
2066     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2067      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2068      *                      not necessary on this hardware
2069      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2070      *
2071      * FIXME: We should allow the user to force contiguous RMA
2072      * allocation in the cap_ppc_rma==1 case.
2073      */
2074     if (cap_ppc_rma < 2) {
2075         return 0;
2076     }
2077
2078     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2079     if (fd < 0) {
2080         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2081                 strerror(errno));
2082         return -1;
2083     }
2084
2085     size = MIN(ret.rma_size, 256ul << 20);
2086
2087     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2088     if (*rma == MAP_FAILED) {
2089         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2090         return -1;
2091     };
2092
2093     return size;
2094 }
2095
2096 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2097 {
2098     struct kvm_ppc_smmu_info info;
2099     long rampagesize, best_page_shift;
2100     int i;
2101
2102     if (cap_ppc_rma >= 2) {
2103         return current_size;
2104     }
2105
2106     /* Find the largest hardware supported page size that's less than
2107      * or equal to the (logical) backing page size of guest RAM */
2108     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2109     rampagesize = qemu_getrampagesize();
2110     best_page_shift = 0;
2111
2112     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2113         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2114
2115         if (!sps->page_shift) {
2116             continue;
2117         }
2118
2119         if ((sps->page_shift > best_page_shift)
2120             && ((1UL << sps->page_shift) <= rampagesize)) {
2121             best_page_shift = sps->page_shift;
2122         }
2123     }
2124
2125     return MIN(current_size,
2126                1ULL << (best_page_shift + hash_shift - 7));
2127 }
2128 #endif
2129
2130 bool kvmppc_spapr_use_multitce(void)
2131 {
2132     return cap_spapr_multitce;
2133 }
2134
2135 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2136                               bool need_vfio)
2137 {
2138     struct kvm_create_spapr_tce args = {
2139         .liobn = liobn,
2140         .window_size = window_size,
2141     };
2142     long len;
2143     int fd;
2144     void *table;
2145
2146     /* Must set fd to -1 so we don't try to munmap when called for
2147      * destroying the table, which the upper layers -will- do
2148      */
2149     *pfd = -1;
2150     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2151         return NULL;
2152     }
2153
2154     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2155     if (fd < 0) {
2156         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2157                 liobn);
2158         return NULL;
2159     }
2160
2161     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2162     /* FIXME: round this up to page size */
2163
2164     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2165     if (table == MAP_FAILED) {
2166         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2167                 liobn);
2168         close(fd);
2169         return NULL;
2170     }
2171
2172     *pfd = fd;
2173     return table;
2174 }
2175
2176 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2177 {
2178     long len;
2179
2180     if (fd < 0) {
2181         return -1;
2182     }
2183
2184     len = nb_table * sizeof(uint64_t);
2185     if ((munmap(table, len) < 0) ||
2186         (close(fd) < 0)) {
2187         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2188                 strerror(errno));
2189         /* Leak the table */
2190     }
2191
2192     return 0;
2193 }
2194
2195 int kvmppc_reset_htab(int shift_hint)
2196 {
2197     uint32_t shift = shift_hint;
2198
2199     if (!kvm_enabled()) {
2200         /* Full emulation, tell caller to allocate htab itself */
2201         return 0;
2202     }
2203     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2204         int ret;
2205         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2206         if (ret == -ENOTTY) {
2207             /* At least some versions of PR KVM advertise the
2208              * capability, but don't implement the ioctl().  Oops.
2209              * Return 0 so that we allocate the htab in qemu, as is
2210              * correct for PR. */
2211             return 0;
2212         } else if (ret < 0) {
2213             return ret;
2214         }
2215         return shift;
2216     }
2217
2218     /* We have a kernel that predates the htab reset calls.  For PR
2219      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2220      * this era, it has allocated a 16MB fixed size hash table already. */
2221     if (kvmppc_is_pr(kvm_state)) {
2222         /* PR - tell caller to allocate htab */
2223         return 0;
2224     } else {
2225         /* HV - assume 16MB kernel allocated htab */
2226         return 24;
2227     }
2228 }
2229
2230 static inline uint32_t mfpvr(void)
2231 {
2232     uint32_t pvr;
2233
2234     asm ("mfpvr %0"
2235          : "=r"(pvr));
2236     return pvr;
2237 }
2238
2239 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2240 {
2241     if (on) {
2242         *word |= flags;
2243     } else {
2244         *word &= ~flags;
2245     }
2246 }
2247
2248 static void kvmppc_host_cpu_initfn(Object *obj)
2249 {
2250     assert(kvm_enabled());
2251 }
2252
2253 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2254 {
2255     DeviceClass *dc = DEVICE_CLASS(oc);
2256     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2257     uint32_t vmx = kvmppc_get_vmx();
2258     uint32_t dfp = kvmppc_get_dfp();
2259     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2260     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2261
2262     /* Now fix up the class with information we can query from the host */
2263     pcc->pvr = mfpvr();
2264
2265     if (vmx != -1) {
2266         /* Only override when we know what the host supports */
2267         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2268         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2269     }
2270     if (dfp != -1) {
2271         /* Only override when we know what the host supports */
2272         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2273     }
2274
2275     if (dcache_size != -1) {
2276         pcc->l1_dcache_size = dcache_size;
2277     }
2278
2279     if (icache_size != -1) {
2280         pcc->l1_icache_size = icache_size;
2281     }
2282
2283     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2284     dc->cannot_destroy_with_object_finalize_yet = true;
2285 }
2286
2287 bool kvmppc_has_cap_epr(void)
2288 {
2289     return cap_epr;
2290 }
2291
2292 bool kvmppc_has_cap_htab_fd(void)
2293 {
2294     return cap_htab_fd;
2295 }
2296
2297 bool kvmppc_has_cap_fixup_hcalls(void)
2298 {
2299     return cap_fixup_hcalls;
2300 }
2301
2302 bool kvmppc_has_cap_htm(void)
2303 {
2304     return cap_htm;
2305 }
2306
2307 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2308 {
2309     ObjectClass *oc = OBJECT_CLASS(pcc);
2310
2311     while (oc && !object_class_is_abstract(oc)) {
2312         oc = object_class_get_parent(oc);
2313     }
2314     assert(oc);
2315
2316     return POWERPC_CPU_CLASS(oc);
2317 }
2318
2319 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2320 {
2321     uint32_t host_pvr = mfpvr();
2322     PowerPCCPUClass *pvr_pcc;
2323
2324     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2325     if (pvr_pcc == NULL) {
2326         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2327     }
2328
2329     return pvr_pcc;
2330 }
2331
2332 static int kvm_ppc_register_host_cpu_type(void)
2333 {
2334     TypeInfo type_info = {
2335         .name = TYPE_HOST_POWERPC_CPU,
2336         .instance_init = kvmppc_host_cpu_initfn,
2337         .class_init = kvmppc_host_cpu_class_init,
2338     };
2339     PowerPCCPUClass *pvr_pcc;
2340     DeviceClass *dc;
2341     int i;
2342
2343     pvr_pcc = kvm_ppc_get_host_cpu_class();
2344     if (pvr_pcc == NULL) {
2345         return -1;
2346     }
2347     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2348     type_register(&type_info);
2349
2350 #if defined(TARGET_PPC64)
2351     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2352     type_info.parent = TYPE_SPAPR_CPU_CORE,
2353     type_info.instance_size = sizeof(sPAPRCPUCore);
2354     type_info.instance_init = NULL;
2355     type_info.class_init = spapr_cpu_core_class_init;
2356     type_info.class_data = (void *) "host";
2357     type_register(&type_info);
2358     g_free((void *)type_info.name);
2359 #endif
2360
2361     /*
2362      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2363      * we want "POWER8" to be a "family" alias that points to the current
2364      * host CPU type, too)
2365      */
2366     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2367     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2368         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2369             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2370             char *suffix;
2371
2372             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2373             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2374             if (suffix) {
2375                 *suffix = 0;
2376             }
2377             ppc_cpu_aliases[i].oc = oc;
2378             break;
2379         }
2380     }
2381
2382     return 0;
2383 }
2384
2385 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2386 {
2387     struct kvm_rtas_token_args args = {
2388         .token = token,
2389     };
2390
2391     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2392         return -ENOENT;
2393     }
2394
2395     strncpy(args.name, function, sizeof(args.name));
2396
2397     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2398 }
2399
2400 int kvmppc_get_htab_fd(bool write)
2401 {
2402     struct kvm_get_htab_fd s = {
2403         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2404         .start_index = 0,
2405     };
2406
2407     if (!cap_htab_fd) {
2408         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2409         return -1;
2410     }
2411
2412     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2413 }
2414
2415 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2416 {
2417     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2418     uint8_t buf[bufsize];
2419     ssize_t rc;
2420
2421     do {
2422         rc = read(fd, buf, bufsize);
2423         if (rc < 0) {
2424             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2425                     strerror(errno));
2426             return rc;
2427         } else if (rc) {
2428             uint8_t *buffer = buf;
2429             ssize_t n = rc;
2430             while (n) {
2431                 struct kvm_get_htab_header *head =
2432                     (struct kvm_get_htab_header *) buffer;
2433                 size_t chunksize = sizeof(*head) +
2434                      HASH_PTE_SIZE_64 * head->n_valid;
2435
2436                 qemu_put_be32(f, head->index);
2437                 qemu_put_be16(f, head->n_valid);
2438                 qemu_put_be16(f, head->n_invalid);
2439                 qemu_put_buffer(f, (void *)(head + 1),
2440                                 HASH_PTE_SIZE_64 * head->n_valid);
2441
2442                 buffer += chunksize;
2443                 n -= chunksize;
2444             }
2445         }
2446     } while ((rc != 0)
2447              && ((max_ns < 0)
2448                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2449
2450     return (rc == 0) ? 1 : 0;
2451 }
2452
2453 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2454                            uint16_t n_valid, uint16_t n_invalid)
2455 {
2456     struct kvm_get_htab_header *buf;
2457     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2458     ssize_t rc;
2459
2460     buf = alloca(chunksize);
2461     buf->index = index;
2462     buf->n_valid = n_valid;
2463     buf->n_invalid = n_invalid;
2464
2465     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2466
2467     rc = write(fd, buf, chunksize);
2468     if (rc < 0) {
2469         fprintf(stderr, "Error writing KVM hash table: %s\n",
2470                 strerror(errno));
2471         return rc;
2472     }
2473     if (rc != chunksize) {
2474         /* We should never get a short write on a single chunk */
2475         fprintf(stderr, "Short write, restoring KVM hash table\n");
2476         return -1;
2477     }
2478     return 0;
2479 }
2480
2481 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2482 {
2483     return true;
2484 }
2485
2486 void kvm_arch_init_irq_routing(KVMState *s)
2487 {
2488 }
2489
2490 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2491 {
2492     struct kvm_get_htab_fd ghf = {
2493         .flags = 0,
2494         .start_index = ptex,
2495     };
2496     int fd, rc;
2497     int i;
2498
2499     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2500     if (fd < 0) {
2501         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2502     }
2503
2504     i = 0;
2505     while (i < n) {
2506         struct kvm_get_htab_header *hdr;
2507         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2508         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2509
2510         rc = read(fd, buf, sizeof(buf));
2511         if (rc < 0) {
2512             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2513         }
2514
2515         hdr = (struct kvm_get_htab_header *)buf;
2516         while ((i < n) && ((char *)hdr < (buf + rc))) {
2517             int invalid = hdr->n_invalid;
2518
2519             if (hdr->index != (ptex + i)) {
2520                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2521                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2522             }
2523
2524             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2525             i += hdr->n_valid;
2526
2527             if ((n - i) < invalid) {
2528                 invalid = n - i;
2529             }
2530             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2531             i += hdr->n_invalid;
2532
2533             hdr = (struct kvm_get_htab_header *)
2534                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2535         }
2536     }
2537
2538     close(fd);
2539 }
2540
2541 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2542 {
2543     int fd, rc;
2544     struct kvm_get_htab_fd ghf;
2545     struct {
2546         struct kvm_get_htab_header hdr;
2547         uint64_t pte0;
2548         uint64_t pte1;
2549     } buf;
2550
2551     ghf.flags = 0;
2552     ghf.start_index = 0;     /* Ignored */
2553     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2554     if (fd < 0) {
2555         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2556     }
2557
2558     buf.hdr.n_valid = 1;
2559     buf.hdr.n_invalid = 0;
2560     buf.hdr.index = ptex;
2561     buf.pte0 = cpu_to_be64(pte0);
2562     buf.pte1 = cpu_to_be64(pte1);
2563
2564     rc = write(fd, &buf, sizeof(buf));
2565     if (rc != sizeof(buf)) {
2566         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2567     }
2568     close(fd);
2569 }
2570
2571 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2572                              uint64_t address, uint32_t data, PCIDevice *dev)
2573 {
2574     return 0;
2575 }
2576
2577 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2578                                 int vector, PCIDevice *dev)
2579 {
2580     return 0;
2581 }
2582
2583 int kvm_arch_release_virq_post(int virq)
2584 {
2585     return 0;
2586 }
2587
2588 int kvm_arch_msi_data_to_gsi(uint32_t data)
2589 {
2590     return data & 0xffff;
2591 }
2592
2593 int kvmppc_enable_hwrng(void)
2594 {
2595     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2596         return -1;
2597     }
2598
2599     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2600 }