target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/error-report.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "kvm_ppc.h"
  31 #include "cpu.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/ppc.h"
  40 #include "sysemu/watchdog.h"
  41 #include "trace.h"
  42 #include "exec/gdbstub.h"
  43 #include "exec/memattrs.h"
  44 #include "sysemu/hostmem.h"
  45
  46 //#define DEBUG_KVM
  47
  48 #ifdef DEBUG_KVM
  49 #define DPRINTF(fmt, ...) \
  50     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  51 #else
  52 #define DPRINTF(fmt, ...) \
  53     do { } while (0)
  54 #endif
  55
  56 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  57
  58 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  59     KVM_CAP_LAST_INFO
  60 };
  61
  62 static int cap_interrupt_unset = false;
  63 static int cap_interrupt_level = false;
  64 static int cap_segstate;
  65 static int cap_booke_sregs;
  66 static int cap_ppc_smt;
  67 static int cap_ppc_rma;
  68 static int cap_spapr_tce;
  69 static int cap_spapr_multitce;
  70 static int cap_spapr_vfio;
  71 static int cap_hior;
  72 static int cap_one_reg;
  73 static int cap_epr;
  74 static int cap_ppc_watchdog;
  75 static int cap_papr;
  76 static int cap_htab_fd;
  77 static int cap_fixup_hcalls;
  78
  79 static uint32_t debug_inst_opcode;
  80
  81 /* XXX We have a race condition where we actually have a level triggered
  82  *     interrupt, but the infrastructure can't expose that yet, so the guest
  83  *     takes but ignores it, goes to sleep and never gets notified that there's
  84  *     still an interrupt pending.
  85  *
  86  *     As a quick workaround, let's just wake up again 20 ms after we injected
  87  *     an interrupt. That way we can assure that we're always reinjecting
  88  *     interrupts in case the guest swallowed them.
  89  */
  90 static QEMUTimer *idle_timer;
  91
  92 static void kvm_kick_cpu(void *opaque)
  93 {
  94     PowerPCCPU *cpu = opaque;
  95
  96     qemu_cpu_kick(CPU(cpu));
  97 }
  98
  99 static int kvm_ppc_register_host_cpu_type(void);
 100
 101 int kvm_arch_init(MachineState *ms, KVMState *s)
 102 {
 103     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 104     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 105     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 106     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 107     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 108     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 109     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 110     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 111     cap_spapr_vfio = false;
 112     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 113     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 114     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 115     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 116     /* Note: we don't set cap_papr here, because this capability is
 117      * only activated after this by kvmppc_set_papr() */
 118     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 119     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 120
 121     if (!cap_interrupt_level) {
 122         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 123                         "VM to stall at times!\n");
 124     }
 125
 126     kvm_ppc_register_host_cpu_type();
 127
 128     return 0;
 129 }
 130
 131 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 132 {
 133     CPUPPCState *cenv = &cpu->env;
 134     CPUState *cs = CPU(cpu);
 135     struct kvm_sregs sregs;
 136     int ret;
 137
 138     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 139         /* What we're really trying to say is "if we're on BookE, we use
 140            the native PVR for now". This is the only sane way to check
 141            it though, so we potentially confuse users that they can run
 142            BookE guests on BookS. Let's hope nobody dares enough :) */
 143         return 0;
 144     } else {
 145         if (!cap_segstate) {
 146             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 147             return -ENOSYS;
 148         }
 149     }
 150
 151     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 152     if (ret) {
 153         return ret;
 154     }
 155
 156     sregs.pvr = cenv->spr[SPR_PVR];
 157     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 158 }
 159
 160 /* Set up a shared TLB array with KVM */
 161 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 162 {
 163     CPUPPCState *env = &cpu->env;
 164     CPUState *cs = CPU(cpu);
 165     struct kvm_book3e_206_tlb_params params = {};
 166     struct kvm_config_tlb cfg = {};
 167     unsigned int entries = 0;
 168     int ret, i;
 169
 170     if (!kvm_enabled() ||
 171         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 172         return 0;
 173     }
 174
 175     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 176
 177     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 178         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 179         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 180         entries += params.tlb_sizes[i];
 181     }
 182
 183     assert(entries == env->nb_tlb);
 184     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 185
 186     env->tlb_dirty = true;
 187
 188     cfg.array = (uintptr_t)env->tlb.tlbm;
 189     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 190     cfg.params = (uintptr_t)&params;
 191     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 192
 193     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 194     if (ret < 0) {
 195         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 196                 __func__, strerror(-ret));
 197         return ret;
 198     }
 199
 200     env->kvm_sw_tlb = true;
 201     return 0;
 202 }
 203
 204
 205 #if defined(TARGET_PPC64)
 206 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 207                                        struct kvm_ppc_smmu_info *info)
 208 {
 209     CPUPPCState *env = &cpu->env;
 210     CPUState *cs = CPU(cpu);
 211
 212     memset(info, 0, sizeof(*info));
 213
 214     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 215      * need to "guess" what the supported page sizes are.
 216      *
 217      * For that to work we make a few assumptions:
 218      *
 219      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 220      *   KVM which only supports 4K and 16M pages, but supports them
 221      *   regardless of the backing store characteritics. We also don't
 222      *   support 1T segments.
 223      *
 224      *   This is safe as if HV KVM ever supports that capability or PR
 225      *   KVM grows supports for more page/segment sizes, those versions
 226      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 227      *   will not hit this fallback
 228      *
 229      * - Else we are running HV KVM. This means we only support page
 230      *   sizes that fit in the backing store. Additionally we only
 231      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 232      *   P7 encodings for the SLB and hash table. Here too, we assume
 233      *   support for any newer processor will mean a kernel that
 234      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 235      *   this fallback.
 236      */
 237     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 238         /* No flags */
 239         info->flags = 0;
 240         info->slb_size = 64;
 241
 242         /* Standard 4k base page size segment */
 243         info->sps[0].page_shift = 12;
 244         info->sps[0].slb_enc = 0;
 245         info->sps[0].enc[0].page_shift = 12;
 246         info->sps[0].enc[0].pte_enc = 0;
 247
 248         /* Standard 16M large page size segment */
 249         info->sps[1].page_shift = 24;
 250         info->sps[1].slb_enc = SLB_VSID_L;
 251         info->sps[1].enc[0].page_shift = 24;
 252         info->sps[1].enc[0].pte_enc = 0;
 253     } else {
 254         int i = 0;
 255
 256         /* HV KVM has backing store size restrictions */
 257         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 258
 259         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 260             info->flags |= KVM_PPC_1T_SEGMENTS;
 261         }
 262
 263         if (env->mmu_model == POWERPC_MMU_2_06 ||
 264             env->mmu_model == POWERPC_MMU_2_07) {
 265             info->slb_size = 32;
 266         } else {
 267             info->slb_size = 64;
 268         }
 269
 270         /* Standard 4k base page size segment */
 271         info->sps[i].page_shift = 12;
 272         info->sps[i].slb_enc = 0;
 273         info->sps[i].enc[0].page_shift = 12;
 274         info->sps[i].enc[0].pte_enc = 0;
 275         i++;
 276
 277         /* 64K on MMU 2.06 and later */
 278         if (env->mmu_model == POWERPC_MMU_2_06 ||
 279             env->mmu_model == POWERPC_MMU_2_07) {
 280             info->sps[i].page_shift = 16;
 281             info->sps[i].slb_enc = 0x110;
 282             info->sps[i].enc[0].page_shift = 16;
 283             info->sps[i].enc[0].pte_enc = 1;
 284             i++;
 285         }
 286
 287         /* Standard 16M large page size segment */
 288         info->sps[i].page_shift = 24;
 289         info->sps[i].slb_enc = SLB_VSID_L;
 290         info->sps[i].enc[0].page_shift = 24;
 291         info->sps[i].enc[0].pte_enc = 0;
 292     }
 293 }
 294
 295 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 296 {
 297     CPUState *cs = CPU(cpu);
 298     int ret;
 299
 300     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 301         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 302         if (ret == 0) {
 303             return;
 304         }
 305     }
 306
 307     kvm_get_fallback_smmu_info(cpu, info);
 308 }
 309
 310 static long gethugepagesize(const char *mem_path)
 311 {
 312     struct statfs fs;
 313     int ret;
 314
 315     do {
 316         ret = statfs(mem_path, &fs);
 317     } while (ret != 0 && errno == EINTR);
 318
 319     if (ret != 0) {
 320         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 321                 strerror(errno));
 322         exit(1);
 323     }
 324
 325 #define HUGETLBFS_MAGIC       0x958458f6
 326
 327     if (fs.f_type != HUGETLBFS_MAGIC) {
 328         /* Explicit mempath, but it's ordinary pages */
 329         return getpagesize();
 330     }
 331
 332     /* It's hugepage, return the huge page size */
 333     return fs.f_bsize;
 334 }
 335
 336 static int find_max_supported_pagesize(Object *obj, void *opaque)
 337 {
 338     char *mem_path;
 339     long *hpsize_min = opaque;
 340
 341     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 342         mem_path = object_property_get_str(obj, "mem-path", NULL);
 343         if (mem_path) {
 344             long hpsize = gethugepagesize(mem_path);
 345             if (hpsize < *hpsize_min) {
 346                 *hpsize_min = hpsize;
 347             }
 348         } else {
 349             *hpsize_min = getpagesize();
 350         }
 351     }
 352
 353     return 0;
 354 }
 355
 356 static long getrampagesize(void)
 357 {
 358     long hpsize = LONG_MAX;
 359     Object *memdev_root;
 360
 361     if (mem_path) {
 362         return gethugepagesize(mem_path);
 363     }
 364
 365     /* it's possible we have memory-backend objects with
 366      * hugepage-backed RAM. these may get mapped into system
 367      * address space via -numa parameters or memory hotplug
 368      * hooks. we want to take these into account, but we
 369      * also want to make sure these supported hugepage
 370      * sizes are applicable across the entire range of memory
 371      * we may boot from, so we take the min across all
 372      * backends, and assume normal pages in cases where a
 373      * backend isn't backed by hugepages.
 374      */
 375     memdev_root = object_resolve_path("/objects", NULL);
 376     if (!memdev_root) {
 377         return getpagesize();
 378     }
 379
 380     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 381
 382     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 383 }
 384
 385 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 386 {
 387     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 388         return true;
 389     }
 390
 391     return (1ul << shift) <= rampgsize;
 392 }
 393
 394 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 395 {
 396     static struct kvm_ppc_smmu_info smmu_info;
 397     static bool has_smmu_info;
 398     CPUPPCState *env = &cpu->env;
 399     long rampagesize;
 400     int iq, ik, jq, jk;
 401
 402     /* We only handle page sizes for 64-bit server guests for now */
 403     if (!(env->mmu_model & POWERPC_MMU_64)) {
 404         return;
 405     }
 406
 407     /* Collect MMU info from kernel if not already */
 408     if (!has_smmu_info) {
 409         kvm_get_smmu_info(cpu, &smmu_info);
 410         has_smmu_info = true;
 411     }
 412
 413     rampagesize = getrampagesize();
 414
 415     /* Convert to QEMU form */
 416     memset(&env->sps, 0, sizeof(env->sps));
 417
 418     /* If we have HV KVM, we need to forbid CI large pages if our
 419      * host page size is smaller than 64K.
 420      */
 421     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 422         env->ci_large_pages = getpagesize() >= 0x10000;
 423     }
 424
 425     /*
 426      * XXX This loop should be an entry wide AND of the capabilities that
 427      *     the selected CPU has with the capabilities that KVM supports.
 428      */
 429     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 430         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 431         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 432
 433         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 434                                  ksps->page_shift)) {
 435             continue;
 436         }
 437         qsps->page_shift = ksps->page_shift;
 438         qsps->slb_enc = ksps->slb_enc;
 439         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 440             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 441                                      ksps->enc[jk].page_shift)) {
 442                 continue;
 443             }
 444             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 445             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 446             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 447                 break;
 448             }
 449         }
 450         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 451             break;
 452         }
 453     }
 454     env->slb_nr = smmu_info.slb_size;
 455     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 456         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 457     }
 458 }
 459 #else /* defined (TARGET_PPC64) */
 460
 461 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 462 {
 463 }
 464
 465 #endif /* !defined (TARGET_PPC64) */
 466
 467 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 468 {
 469     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 470 }
 471
 472 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 473  * book3s supports only 1 watchpoint, so array size
 474  * of 4 is sufficient for now.
 475  */
 476 #define MAX_HW_BKPTS 4
 477
 478 static struct HWBreakpoint {
 479     target_ulong addr;
 480     int type;
 481 } hw_debug_points[MAX_HW_BKPTS];
 482
 483 static CPUWatchpoint hw_watchpoint;
 484
 485 /* Default there is no breakpoint and watchpoint supported */
 486 static int max_hw_breakpoint;
 487 static int max_hw_watchpoint;
 488 static int nb_hw_breakpoint;
 489 static int nb_hw_watchpoint;
 490
 491 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 492 {
 493     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 494         max_hw_breakpoint = 2;
 495         max_hw_watchpoint = 2;
 496     }
 497
 498     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 499         fprintf(stderr, "Error initializing h/w breakpoints\n");
 500         return;
 501     }
 502 }
 503
 504 int kvm_arch_init_vcpu(CPUState *cs)
 505 {
 506     PowerPCCPU *cpu = POWERPC_CPU(cs);
 507     CPUPPCState *cenv = &cpu->env;
 508     int ret;
 509
 510     /* Gather server mmu info from KVM and update the CPU state */
 511     kvm_fixup_page_sizes(cpu);
 512
 513     /* Synchronize sregs with kvm */
 514     ret = kvm_arch_sync_sregs(cpu);
 515     if (ret) {
 516         if (ret == -EINVAL) {
 517             error_report("Register sync failed... If you're using kvm-hv.ko,"
 518                          " only \"-cpu host\" is possible");
 519         }
 520         return ret;
 521     }
 522
 523     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 524
 525     /* Some targets support access to KVM's guest TLB. */
 526     switch (cenv->mmu_model) {
 527     case POWERPC_MMU_BOOKE206:
 528         ret = kvm_booke206_tlb_init(cpu);
 529         break;
 530     default:
 531         break;
 532     }
 533
 534     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 535     kvmppc_hw_debug_points_init(cenv);
 536
 537     return ret;
 538 }
 539
 540 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 541 {
 542     CPUPPCState *env = &cpu->env;
 543     CPUState *cs = CPU(cpu);
 544     struct kvm_dirty_tlb dirty_tlb;
 545     unsigned char *bitmap;
 546     int ret;
 547
 548     if (!env->kvm_sw_tlb) {
 549         return;
 550     }
 551
 552     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 553     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 554
 555     dirty_tlb.bitmap = (uintptr_t)bitmap;
 556     dirty_tlb.num_dirty = env->nb_tlb;
 557
 558     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 559     if (ret) {
 560         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 561                 __func__, strerror(-ret));
 562     }
 563
 564     g_free(bitmap);
 565 }
 566
 567 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 568 {
 569     PowerPCCPU *cpu = POWERPC_CPU(cs);
 570     CPUPPCState *env = &cpu->env;
 571     union {
 572         uint32_t u32;
 573         uint64_t u64;
 574     } val;
 575     struct kvm_one_reg reg = {
 576         .id = id,
 577         .addr = (uintptr_t) &val,
 578     };
 579     int ret;
 580
 581     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 582     if (ret != 0) {
 583         trace_kvm_failed_spr_get(spr, strerror(errno));
 584     } else {
 585         switch (id & KVM_REG_SIZE_MASK) {
 586         case KVM_REG_SIZE_U32:
 587             env->spr[spr] = val.u32;
 588             break;
 589
 590         case KVM_REG_SIZE_U64:
 591             env->spr[spr] = val.u64;
 592             break;
 593
 594         default:
 595             /* Don't handle this size yet */
 596             abort();
 597         }
 598     }
 599 }
 600
 601 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 602 {
 603     PowerPCCPU *cpu = POWERPC_CPU(cs);
 604     CPUPPCState *env = &cpu->env;
 605     union {
 606         uint32_t u32;
 607         uint64_t u64;
 608     } val;
 609     struct kvm_one_reg reg = {
 610         .id = id,
 611         .addr = (uintptr_t) &val,
 612     };
 613     int ret;
 614
 615     switch (id & KVM_REG_SIZE_MASK) {
 616     case KVM_REG_SIZE_U32:
 617         val.u32 = env->spr[spr];
 618         break;
 619
 620     case KVM_REG_SIZE_U64:
 621         val.u64 = env->spr[spr];
 622         break;
 623
 624     default:
 625         /* Don't handle this size yet */
 626         abort();
 627     }
 628
 629     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 630     if (ret != 0) {
 631         trace_kvm_failed_spr_set(spr, strerror(errno));
 632     }
 633 }
 634
 635 static int kvm_put_fp(CPUState *cs)
 636 {
 637     PowerPCCPU *cpu = POWERPC_CPU(cs);
 638     CPUPPCState *env = &cpu->env;
 639     struct kvm_one_reg reg;
 640     int i;
 641     int ret;
 642
 643     if (env->insns_flags & PPC_FLOAT) {
 644         uint64_t fpscr = env->fpscr;
 645         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 646
 647         reg.id = KVM_REG_PPC_FPSCR;
 648         reg.addr = (uintptr_t)&fpscr;
 649         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 650         if (ret < 0) {
 651             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 652             return ret;
 653         }
 654
 655         for (i = 0; i < 32; i++) {
 656             uint64_t vsr[2];
 657
 658 #ifdef HOST_WORDS_BIGENDIAN
 659             vsr[0] = float64_val(env->fpr[i]);
 660             vsr[1] = env->vsr[i];
 661 #else
 662             vsr[0] = env->vsr[i];
 663             vsr[1] = float64_val(env->fpr[i]);
 664 #endif
 665             reg.addr = (uintptr_t) &vsr;
 666             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 667
 668             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 669             if (ret < 0) {
 670                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 671                         i, strerror(errno));
 672                 return ret;
 673             }
 674         }
 675     }
 676
 677     if (env->insns_flags & PPC_ALTIVEC) {
 678         reg.id = KVM_REG_PPC_VSCR;
 679         reg.addr = (uintptr_t)&env->vscr;
 680         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 681         if (ret < 0) {
 682             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 683             return ret;
 684         }
 685
 686         for (i = 0; i < 32; i++) {
 687             reg.id = KVM_REG_PPC_VR(i);
 688             reg.addr = (uintptr_t)&env->avr[i];
 689             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 690             if (ret < 0) {
 691                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 692                 return ret;
 693             }
 694         }
 695     }
 696
 697     return 0;
 698 }
 699
 700 static int kvm_get_fp(CPUState *cs)
 701 {
 702     PowerPCCPU *cpu = POWERPC_CPU(cs);
 703     CPUPPCState *env = &cpu->env;
 704     struct kvm_one_reg reg;
 705     int i;
 706     int ret;
 707
 708     if (env->insns_flags & PPC_FLOAT) {
 709         uint64_t fpscr;
 710         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 711
 712         reg.id = KVM_REG_PPC_FPSCR;
 713         reg.addr = (uintptr_t)&fpscr;
 714         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 715         if (ret < 0) {
 716             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 717             return ret;
 718         } else {
 719             env->fpscr = fpscr;
 720         }
 721
 722         for (i = 0; i < 32; i++) {
 723             uint64_t vsr[2];
 724
 725             reg.addr = (uintptr_t) &vsr;
 726             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 727
 728             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 729             if (ret < 0) {
 730                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 731                         vsx ? "VSR" : "FPR", i, strerror(errno));
 732                 return ret;
 733             } else {
 734 #ifdef HOST_WORDS_BIGENDIAN
 735                 env->fpr[i] = vsr[0];
 736                 if (vsx) {
 737                     env->vsr[i] = vsr[1];
 738                 }
 739 #else
 740                 env->fpr[i] = vsr[1];
 741                 if (vsx) {
 742                     env->vsr[i] = vsr[0];
 743                 }
 744 #endif
 745             }
 746         }
 747     }
 748
 749     if (env->insns_flags & PPC_ALTIVEC) {
 750         reg.id = KVM_REG_PPC_VSCR;
 751         reg.addr = (uintptr_t)&env->vscr;
 752         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 753         if (ret < 0) {
 754             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 755             return ret;
 756         }
 757
 758         for (i = 0; i < 32; i++) {
 759             reg.id = KVM_REG_PPC_VR(i);
 760             reg.addr = (uintptr_t)&env->avr[i];
 761             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 762             if (ret < 0) {
 763                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 764                         i, strerror(errno));
 765                 return ret;
 766             }
 767         }
 768     }
 769
 770     return 0;
 771 }
 772
 773 #if defined(TARGET_PPC64)
 774 static int kvm_get_vpa(CPUState *cs)
 775 {
 776     PowerPCCPU *cpu = POWERPC_CPU(cs);
 777     CPUPPCState *env = &cpu->env;
 778     struct kvm_one_reg reg;
 779     int ret;
 780
 781     reg.id = KVM_REG_PPC_VPA_ADDR;
 782     reg.addr = (uintptr_t)&env->vpa_addr;
 783     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 784     if (ret < 0) {
 785         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 786         return ret;
 787     }
 788
 789     assert((uintptr_t)&env->slb_shadow_size
 790            == ((uintptr_t)&env->slb_shadow_addr + 8));
 791     reg.id = KVM_REG_PPC_VPA_SLB;
 792     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 793     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 794     if (ret < 0) {
 795         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 796                 strerror(errno));
 797         return ret;
 798     }
 799
 800     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 801     reg.id = KVM_REG_PPC_VPA_DTL;
 802     reg.addr = (uintptr_t)&env->dtl_addr;
 803     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 804     if (ret < 0) {
 805         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 806                 strerror(errno));
 807         return ret;
 808     }
 809
 810     return 0;
 811 }
 812
 813 static int kvm_put_vpa(CPUState *cs)
 814 {
 815     PowerPCCPU *cpu = POWERPC_CPU(cs);
 816     CPUPPCState *env = &cpu->env;
 817     struct kvm_one_reg reg;
 818     int ret;
 819
 820     /* SLB shadow or DTL can't be registered unless a master VPA is
 821      * registered.  That means when restoring state, if a VPA *is*
 822      * registered, we need to set that up first.  If not, we need to
 823      * deregister the others before deregistering the master VPA */
 824     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 825
 826     if (env->vpa_addr) {
 827         reg.id = KVM_REG_PPC_VPA_ADDR;
 828         reg.addr = (uintptr_t)&env->vpa_addr;
 829         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 830         if (ret < 0) {
 831             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 832             return ret;
 833         }
 834     }
 835
 836     assert((uintptr_t)&env->slb_shadow_size
 837            == ((uintptr_t)&env->slb_shadow_addr + 8));
 838     reg.id = KVM_REG_PPC_VPA_SLB;
 839     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 840     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 841     if (ret < 0) {
 842         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 843         return ret;
 844     }
 845
 846     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 847     reg.id = KVM_REG_PPC_VPA_DTL;
 848     reg.addr = (uintptr_t)&env->dtl_addr;
 849     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 850     if (ret < 0) {
 851         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 852                 strerror(errno));
 853         return ret;
 854     }
 855
 856     if (!env->vpa_addr) {
 857         reg.id = KVM_REG_PPC_VPA_ADDR;
 858         reg.addr = (uintptr_t)&env->vpa_addr;
 859         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 860         if (ret < 0) {
 861             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 862             return ret;
 863         }
 864     }
 865
 866     return 0;
 867 }
 868 #endif /* TARGET_PPC64 */
 869
 870 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 871 {
 872     CPUPPCState *env = &cpu->env;
 873     struct kvm_sregs sregs;
 874     int i;
 875
 876     sregs.pvr = env->spr[SPR_PVR];
 877
 878     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 879
 880     /* Sync SLB */
 881 #ifdef TARGET_PPC64
 882     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 883         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 884         if (env->slb[i].esid & SLB_ESID_V) {
 885             sregs.u.s.ppc64.slb[i].slbe |= i;
 886         }
 887         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 888     }
 889 #endif
 890
 891     /* Sync SRs */
 892     for (i = 0; i < 16; i++) {
 893         sregs.u.s.ppc32.sr[i] = env->sr[i];
 894     }
 895
 896     /* Sync BATs */
 897     for (i = 0; i < 8; i++) {
 898         /* Beware. We have to swap upper and lower bits here */
 899         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 900             | env->DBAT[1][i];
 901         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 902             | env->IBAT[1][i];
 903     }
 904
 905     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 906 }
 907
 908 int kvm_arch_put_registers(CPUState *cs, int level)
 909 {
 910     PowerPCCPU *cpu = POWERPC_CPU(cs);
 911     CPUPPCState *env = &cpu->env;
 912     struct kvm_regs regs;
 913     int ret;
 914     int i;
 915
 916     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 917     if (ret < 0) {
 918         return ret;
 919     }
 920
 921     regs.ctr = env->ctr;
 922     regs.lr  = env->lr;
 923     regs.xer = cpu_read_xer(env);
 924     regs.msr = env->msr;
 925     regs.pc = env->nip;
 926
 927     regs.srr0 = env->spr[SPR_SRR0];
 928     regs.srr1 = env->spr[SPR_SRR1];
 929
 930     regs.sprg0 = env->spr[SPR_SPRG0];
 931     regs.sprg1 = env->spr[SPR_SPRG1];
 932     regs.sprg2 = env->spr[SPR_SPRG2];
 933     regs.sprg3 = env->spr[SPR_SPRG3];
 934     regs.sprg4 = env->spr[SPR_SPRG4];
 935     regs.sprg5 = env->spr[SPR_SPRG5];
 936     regs.sprg6 = env->spr[SPR_SPRG6];
 937     regs.sprg7 = env->spr[SPR_SPRG7];
 938
 939     regs.pid = env->spr[SPR_BOOKE_PID];
 940
 941     for (i = 0;i < 32; i++)
 942         regs.gpr[i] = env->gpr[i];
 943
 944     regs.cr = 0;
 945     for (i = 0; i < 8; i++) {
 946         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 947     }
 948
 949     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 950     if (ret < 0)
 951         return ret;
 952
 953     kvm_put_fp(cs);
 954
 955     if (env->tlb_dirty) {
 956         kvm_sw_tlb_put(cpu);
 957         env->tlb_dirty = false;
 958     }
 959
 960     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 961         ret = kvmppc_put_books_sregs(cpu);
 962         if (ret < 0) {
 963             return ret;
 964         }
 965     }
 966
 967     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 968         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 969     }
 970
 971     if (cap_one_reg) {
 972         int i;
 973
 974         /* We deliberately ignore errors here, for kernels which have
 975          * the ONE_REG calls, but don't support the specific
 976          * registers, there's a reasonable chance things will still
 977          * work, at least until we try to migrate. */
 978         for (i = 0; i < 1024; i++) {
 979             uint64_t id = env->spr_cb[i].one_reg_id;
 980
 981             if (id != 0) {
 982                 kvm_put_one_spr(cs, id, i);
 983             }
 984         }
 985
 986 #ifdef TARGET_PPC64
 987         if (msr_ts) {
 988             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 989                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 990             }
 991             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 992                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 993             }
 994             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 995             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 996             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 997             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 998             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 999             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1000             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1001             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1002             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1003             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1004         }
1005
1006         if (cap_papr) {
1007             if (kvm_put_vpa(cs) < 0) {
1008                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1009             }
1010         }
1011
1012         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1013 #endif /* TARGET_PPC64 */
1014     }
1015
1016     return ret;
1017 }
1018
1019 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1020 {
1021      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1022 }
1023
1024 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1025 {
1026     CPUPPCState *env = &cpu->env;
1027     struct kvm_sregs sregs;
1028     int ret;
1029
1030     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1031     if (ret < 0) {
1032         return ret;
1033     }
1034
1035     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1036         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1037         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1038         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1039         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1040         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1041         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1042         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1043         env->spr[SPR_DECR] = sregs.u.e.dec;
1044         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1045         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1046         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1047     }
1048
1049     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1050         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1051         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1052         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1053         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1054         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1055     }
1056
1057     if (sregs.u.e.features & KVM_SREGS_E_64) {
1058         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1059     }
1060
1061     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1062         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1063     }
1064
1065     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1066         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1067         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1068         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1069         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1070         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1071         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1072         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1073         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1074         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1075         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1076         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1077         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1078         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1079         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1080         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1081         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1082         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1083         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1084         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1085         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1086         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1087         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1088         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1089         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1090         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1091         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1092         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1093         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1094         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1095         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1096         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1097         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1098
1099         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1100             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1101             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1102             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1103             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1104             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1105             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1106         }
1107
1108         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1109             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1110             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1111         }
1112
1113         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1114             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1115             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1116             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1117             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1118         }
1119     }
1120
1121     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1122         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1123         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1124         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1125         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1126         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1127         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1128         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1129         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1130         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1131         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1132     }
1133
1134     if (sregs.u.e.features & KVM_SREGS_EXP) {
1135         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1136     }
1137
1138     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1139         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1140         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1141     }
1142
1143     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1144         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1145         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1146         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1147
1148         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1149             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1150             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1151         }
1152     }
1153
1154     return 0;
1155 }
1156
1157 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1158 {
1159     CPUPPCState *env = &cpu->env;
1160     struct kvm_sregs sregs;
1161     int ret;
1162     int i;
1163
1164     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1165     if (ret < 0) {
1166         return ret;
1167     }
1168
1169     if (!env->external_htab) {
1170         ppc_store_sdr1(env, sregs.u.s.sdr1);
1171     }
1172
1173     /* Sync SLB */
1174 #ifdef TARGET_PPC64
1175     /*
1176      * The packed SLB array we get from KVM_GET_SREGS only contains
1177      * information about valid entries. So we flush our internal copy
1178      * to get rid of stale ones, then put all valid SLB entries back
1179      * in.
1180      */
1181     memset(env->slb, 0, sizeof(env->slb));
1182     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1183         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1184         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1185         /*
1186          * Only restore valid entries
1187          */
1188         if (rb & SLB_ESID_V) {
1189             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1190         }
1191     }
1192 #endif
1193
1194     /* Sync SRs */
1195     for (i = 0; i < 16; i++) {
1196         env->sr[i] = sregs.u.s.ppc32.sr[i];
1197     }
1198
1199     /* Sync BATs */
1200     for (i = 0; i < 8; i++) {
1201         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1202         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1203         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1204         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1205     }
1206
1207     return 0;
1208 }
1209
1210 int kvm_arch_get_registers(CPUState *cs)
1211 {
1212     PowerPCCPU *cpu = POWERPC_CPU(cs);
1213     CPUPPCState *env = &cpu->env;
1214     struct kvm_regs regs;
1215     uint32_t cr;
1216     int i, ret;
1217
1218     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1219     if (ret < 0)
1220         return ret;
1221
1222     cr = regs.cr;
1223     for (i = 7; i >= 0; i--) {
1224         env->crf[i] = cr & 15;
1225         cr >>= 4;
1226     }
1227
1228     env->ctr = regs.ctr;
1229     env->lr = regs.lr;
1230     cpu_write_xer(env, regs.xer);
1231     env->msr = regs.msr;
1232     env->nip = regs.pc;
1233
1234     env->spr[SPR_SRR0] = regs.srr0;
1235     env->spr[SPR_SRR1] = regs.srr1;
1236
1237     env->spr[SPR_SPRG0] = regs.sprg0;
1238     env->spr[SPR_SPRG1] = regs.sprg1;
1239     env->spr[SPR_SPRG2] = regs.sprg2;
1240     env->spr[SPR_SPRG3] = regs.sprg3;
1241     env->spr[SPR_SPRG4] = regs.sprg4;
1242     env->spr[SPR_SPRG5] = regs.sprg5;
1243     env->spr[SPR_SPRG6] = regs.sprg6;
1244     env->spr[SPR_SPRG7] = regs.sprg7;
1245
1246     env->spr[SPR_BOOKE_PID] = regs.pid;
1247
1248     for (i = 0;i < 32; i++)
1249         env->gpr[i] = regs.gpr[i];
1250
1251     kvm_get_fp(cs);
1252
1253     if (cap_booke_sregs) {
1254         ret = kvmppc_get_booke_sregs(cpu);
1255         if (ret < 0) {
1256             return ret;
1257         }
1258     }
1259
1260     if (cap_segstate) {
1261         ret = kvmppc_get_books_sregs(cpu);
1262         if (ret < 0) {
1263             return ret;
1264         }
1265     }
1266
1267     if (cap_hior) {
1268         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1269     }
1270
1271     if (cap_one_reg) {
1272         int i;
1273
1274         /* We deliberately ignore errors here, for kernels which have
1275          * the ONE_REG calls, but don't support the specific
1276          * registers, there's a reasonable chance things will still
1277          * work, at least until we try to migrate. */
1278         for (i = 0; i < 1024; i++) {
1279             uint64_t id = env->spr_cb[i].one_reg_id;
1280
1281             if (id != 0) {
1282                 kvm_get_one_spr(cs, id, i);
1283             }
1284         }
1285
1286 #ifdef TARGET_PPC64
1287         if (msr_ts) {
1288             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1289                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1290             }
1291             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1292                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1293             }
1294             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1295             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1296             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1297             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1298             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1299             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1300             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1301             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1302             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1303             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1304         }
1305
1306         if (cap_papr) {
1307             if (kvm_get_vpa(cs) < 0) {
1308                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1309             }
1310         }
1311
1312         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1313 #endif
1314     }
1315
1316     return 0;
1317 }
1318
1319 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1320 {
1321     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1322
1323     if (irq != PPC_INTERRUPT_EXT) {
1324         return 0;
1325     }
1326
1327     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1328         return 0;
1329     }
1330
1331     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1332
1333     return 0;
1334 }
1335
1336 #if defined(TARGET_PPCEMB)
1337 #define PPC_INPUT_INT PPC40x_INPUT_INT
1338 #elif defined(TARGET_PPC64)
1339 #define PPC_INPUT_INT PPC970_INPUT_INT
1340 #else
1341 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1342 #endif
1343
1344 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1345 {
1346     PowerPCCPU *cpu = POWERPC_CPU(cs);
1347     CPUPPCState *env = &cpu->env;
1348     int r;
1349     unsigned irq;
1350
1351     qemu_mutex_lock_iothread();
1352
1353     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1354      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1355     if (!cap_interrupt_level &&
1356         run->ready_for_interrupt_injection &&
1357         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1358         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1359     {
1360         /* For now KVM disregards the 'irq' argument. However, in the
1361          * future KVM could cache it in-kernel to avoid a heavyweight exit
1362          * when reading the UIC.
1363          */
1364         irq = KVM_INTERRUPT_SET;
1365
1366         DPRINTF("injected interrupt %d\n", irq);
1367         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1368         if (r < 0) {
1369             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1370         }
1371
1372         /* Always wake up soon in case the interrupt was level based */
1373         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1374                        (NANOSECONDS_PER_SECOND / 50));
1375     }
1376
1377     /* We don't know if there are more interrupts pending after this. However,
1378      * the guest will return to userspace in the course of handling this one
1379      * anyways, so we will get a chance to deliver the rest. */
1380
1381     qemu_mutex_unlock_iothread();
1382 }
1383
1384 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1385 {
1386     return MEMTXATTRS_UNSPECIFIED;
1387 }
1388
1389 int kvm_arch_process_async_events(CPUState *cs)
1390 {
1391     return cs->halted;
1392 }
1393
1394 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1395 {
1396     CPUState *cs = CPU(cpu);
1397     CPUPPCState *env = &cpu->env;
1398
1399     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1400         cs->halted = 1;
1401         cs->exception_index = EXCP_HLT;
1402     }
1403
1404     return 0;
1405 }
1406
1407 /* map dcr access to existing qemu dcr emulation */
1408 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1409 {
1410     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1411         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1412
1413     return 0;
1414 }
1415
1416 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1417 {
1418     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1419         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1420
1421     return 0;
1422 }
1423
1424 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1425 {
1426     /* Mixed endian case is not handled */
1427     uint32_t sc = debug_inst_opcode;
1428
1429     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1430                             sizeof(sc), 0) ||
1431         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1432         return -EINVAL;
1433     }
1434
1435     return 0;
1436 }
1437
1438 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1439 {
1440     uint32_t sc;
1441
1442     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1443         sc != debug_inst_opcode ||
1444         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1445                             sizeof(sc), 1)) {
1446         return -EINVAL;
1447     }
1448
1449     return 0;
1450 }
1451
1452 static int find_hw_breakpoint(target_ulong addr, int type)
1453 {
1454     int n;
1455
1456     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1457            <= ARRAY_SIZE(hw_debug_points));
1458
1459     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1460         if (hw_debug_points[n].addr == addr &&
1461              hw_debug_points[n].type == type) {
1462             return n;
1463         }
1464     }
1465
1466     return -1;
1467 }
1468
1469 static int find_hw_watchpoint(target_ulong addr, int *flag)
1470 {
1471     int n;
1472
1473     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1474     if (n >= 0) {
1475         *flag = BP_MEM_ACCESS;
1476         return n;
1477     }
1478
1479     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1480     if (n >= 0) {
1481         *flag = BP_MEM_WRITE;
1482         return n;
1483     }
1484
1485     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1486     if (n >= 0) {
1487         *flag = BP_MEM_READ;
1488         return n;
1489     }
1490
1491     return -1;
1492 }
1493
1494 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1495                                   target_ulong len, int type)
1496 {
1497     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1498         return -ENOBUFS;
1499     }
1500
1501     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1502     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1503
1504     switch (type) {
1505     case GDB_BREAKPOINT_HW:
1506         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1507             return -ENOBUFS;
1508         }
1509
1510         if (find_hw_breakpoint(addr, type) >= 0) {
1511             return -EEXIST;
1512         }
1513
1514         nb_hw_breakpoint++;
1515         break;
1516
1517     case GDB_WATCHPOINT_WRITE:
1518     case GDB_WATCHPOINT_READ:
1519     case GDB_WATCHPOINT_ACCESS:
1520         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1521             return -ENOBUFS;
1522         }
1523
1524         if (find_hw_breakpoint(addr, type) >= 0) {
1525             return -EEXIST;
1526         }
1527
1528         nb_hw_watchpoint++;
1529         break;
1530
1531     default:
1532         return -ENOSYS;
1533     }
1534
1535     return 0;
1536 }
1537
1538 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1539                                   target_ulong len, int type)
1540 {
1541     int n;
1542
1543     n = find_hw_breakpoint(addr, type);
1544     if (n < 0) {
1545         return -ENOENT;
1546     }
1547
1548     switch (type) {
1549     case GDB_BREAKPOINT_HW:
1550         nb_hw_breakpoint--;
1551         break;
1552
1553     case GDB_WATCHPOINT_WRITE:
1554     case GDB_WATCHPOINT_READ:
1555     case GDB_WATCHPOINT_ACCESS:
1556         nb_hw_watchpoint--;
1557         break;
1558
1559     default:
1560         return -ENOSYS;
1561     }
1562     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1563
1564     return 0;
1565 }
1566
1567 void kvm_arch_remove_all_hw_breakpoints(void)
1568 {
1569     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1570 }
1571
1572 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1573 {
1574     int n;
1575
1576     /* Software Breakpoint updates */
1577     if (kvm_sw_breakpoints_active(cs)) {
1578         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1579     }
1580
1581     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1582            <= ARRAY_SIZE(hw_debug_points));
1583     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1584
1585     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1586         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1587         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1588         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1589             switch (hw_debug_points[n].type) {
1590             case GDB_BREAKPOINT_HW:
1591                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1592                 break;
1593             case GDB_WATCHPOINT_WRITE:
1594                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1595                 break;
1596             case GDB_WATCHPOINT_READ:
1597                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1598                 break;
1599             case GDB_WATCHPOINT_ACCESS:
1600                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1601                                         KVMPPC_DEBUG_WATCH_READ;
1602                 break;
1603             default:
1604                 cpu_abort(cs, "Unsupported breakpoint type\n");
1605             }
1606             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1607         }
1608     }
1609 }
1610
1611 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1612 {
1613     CPUState *cs = CPU(cpu);
1614     CPUPPCState *env = &cpu->env;
1615     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1616     int handle = 0;
1617     int n;
1618     int flag = 0;
1619
1620     if (cs->singlestep_enabled) {
1621         handle = 1;
1622     } else if (arch_info->status) {
1623         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1624             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1625                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1626                 if (n >= 0) {
1627                     handle = 1;
1628                 }
1629             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1630                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1631                 n = find_hw_watchpoint(arch_info->address,  &flag);
1632                 if (n >= 0) {
1633                     handle = 1;
1634                     cs->watchpoint_hit = &hw_watchpoint;
1635                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1636                     hw_watchpoint.flags = flag;
1637                 }
1638             }
1639         }
1640     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1641         handle = 1;
1642     } else {
1643         /* QEMU is not able to handle debug exception, so inject
1644          * program exception to guest;
1645          * Yes program exception NOT debug exception !!
1646          * When QEMU is using debug resources then debug exception must
1647          * be always set. To achieve this we set MSR_DE and also set
1648          * MSRP_DEP so guest cannot change MSR_DE.
1649          * When emulating debug resource for guest we want guest
1650          * to control MSR_DE (enable/disable debug interrupt on need).
1651          * Supporting both configurations are NOT possible.
1652          * So the result is that we cannot share debug resources
1653          * between QEMU and Guest on BOOKE architecture.
1654          * In the current design QEMU gets the priority over guest,
1655          * this means that if QEMU is using debug resources then guest
1656          * cannot use them;
1657          * For software breakpoint QEMU uses a privileged instruction;
1658          * So there cannot be any reason that we are here for guest
1659          * set debug exception, only possibility is guest executed a
1660          * privileged / illegal instruction and that's why we are
1661          * injecting a program interrupt.
1662          */
1663
1664         cpu_synchronize_state(cs);
1665         /* env->nip is PC, so increment this by 4 to use
1666          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1667          */
1668         env->nip += 4;
1669         cs->exception_index = POWERPC_EXCP_PROGRAM;
1670         env->error_code = POWERPC_EXCP_INVAL;
1671         ppc_cpu_do_interrupt(cs);
1672     }
1673
1674     return handle;
1675 }
1676
1677 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1678 {
1679     PowerPCCPU *cpu = POWERPC_CPU(cs);
1680     CPUPPCState *env = &cpu->env;
1681     int ret;
1682
1683     qemu_mutex_lock_iothread();
1684
1685     switch (run->exit_reason) {
1686     case KVM_EXIT_DCR:
1687         if (run->dcr.is_write) {
1688             DPRINTF("handle dcr write\n");
1689             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1690         } else {
1691             DPRINTF("handle dcr read\n");
1692             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1693         }
1694         break;
1695     case KVM_EXIT_HLT:
1696         DPRINTF("handle halt\n");
1697         ret = kvmppc_handle_halt(cpu);
1698         break;
1699 #if defined(TARGET_PPC64)
1700     case KVM_EXIT_PAPR_HCALL:
1701         DPRINTF("handle PAPR hypercall\n");
1702         run->papr_hcall.ret = spapr_hypercall(cpu,
1703                                               run->papr_hcall.nr,
1704                                               run->papr_hcall.args);
1705         ret = 0;
1706         break;
1707 #endif
1708     case KVM_EXIT_EPR:
1709         DPRINTF("handle epr\n");
1710         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1711         ret = 0;
1712         break;
1713     case KVM_EXIT_WATCHDOG:
1714         DPRINTF("handle watchdog expiry\n");
1715         watchdog_perform_action();
1716         ret = 0;
1717         break;
1718
1719     case KVM_EXIT_DEBUG:
1720         DPRINTF("handle debug exception\n");
1721         if (kvm_handle_debug(cpu, run)) {
1722             ret = EXCP_DEBUG;
1723             break;
1724         }
1725         /* re-enter, this exception was guest-internal */
1726         ret = 0;
1727         break;
1728
1729     default:
1730         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1731         ret = -1;
1732         break;
1733     }
1734
1735     qemu_mutex_unlock_iothread();
1736     return ret;
1737 }
1738
1739 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1740 {
1741     CPUState *cs = CPU(cpu);
1742     uint32_t bits = tsr_bits;
1743     struct kvm_one_reg reg = {
1744         .id = KVM_REG_PPC_OR_TSR,
1745         .addr = (uintptr_t) &bits,
1746     };
1747
1748     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1749 }
1750
1751 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1752 {
1753
1754     CPUState *cs = CPU(cpu);
1755     uint32_t bits = tsr_bits;
1756     struct kvm_one_reg reg = {
1757         .id = KVM_REG_PPC_CLEAR_TSR,
1758         .addr = (uintptr_t) &bits,
1759     };
1760
1761     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1762 }
1763
1764 int kvmppc_set_tcr(PowerPCCPU *cpu)
1765 {
1766     CPUState *cs = CPU(cpu);
1767     CPUPPCState *env = &cpu->env;
1768     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1769
1770     struct kvm_one_reg reg = {
1771         .id = KVM_REG_PPC_TCR,
1772         .addr = (uintptr_t) &tcr,
1773     };
1774
1775     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1776 }
1777
1778 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1779 {
1780     CPUState *cs = CPU(cpu);
1781     int ret;
1782
1783     if (!kvm_enabled()) {
1784         return -1;
1785     }
1786
1787     if (!cap_ppc_watchdog) {
1788         printf("warning: KVM does not support watchdog");
1789         return -1;
1790     }
1791
1792     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1793     if (ret < 0) {
1794         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1795                 __func__, strerror(-ret));
1796         return ret;
1797     }
1798
1799     return ret;
1800 }
1801
1802 static int read_cpuinfo(const char *field, char *value, int len)
1803 {
1804     FILE *f;
1805     int ret = -1;
1806     int field_len = strlen(field);
1807     char line[512];
1808
1809     f = fopen("/proc/cpuinfo", "r");
1810     if (!f) {
1811         return -1;
1812     }
1813
1814     do {
1815         if (!fgets(line, sizeof(line), f)) {
1816             break;
1817         }
1818         if (!strncmp(line, field, field_len)) {
1819             pstrcpy(value, len, line);
1820             ret = 0;
1821             break;
1822         }
1823     } while(*line);
1824
1825     fclose(f);
1826
1827     return ret;
1828 }
1829
1830 uint32_t kvmppc_get_tbfreq(void)
1831 {
1832     char line[512];
1833     char *ns;
1834     uint32_t retval = NANOSECONDS_PER_SECOND;
1835
1836     if (read_cpuinfo("timebase", line, sizeof(line))) {
1837         return retval;
1838     }
1839
1840     if (!(ns = strchr(line, ':'))) {
1841         return retval;
1842     }
1843
1844     ns++;
1845
1846     return atoi(ns);
1847 }
1848
1849 bool kvmppc_get_host_serial(char **value)
1850 {
1851     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1852                                NULL);
1853 }
1854
1855 bool kvmppc_get_host_model(char **value)
1856 {
1857     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1858 }
1859
1860 /* Try to find a device tree node for a CPU with clock-frequency property */
1861 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1862 {
1863     struct dirent *dirp;
1864     DIR *dp;
1865
1866     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1867         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1868         return -1;
1869     }
1870
1871     buf[0] = '\0';
1872     while ((dirp = readdir(dp)) != NULL) {
1873         FILE *f;
1874         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1875                  dirp->d_name);
1876         f = fopen(buf, "r");
1877         if (f) {
1878             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1879             fclose(f);
1880             break;
1881         }
1882         buf[0] = '\0';
1883     }
1884     closedir(dp);
1885     if (buf[0] == '\0') {
1886         printf("Unknown host!\n");
1887         return -1;
1888     }
1889
1890     return 0;
1891 }
1892
1893 static uint64_t kvmppc_read_int_dt(const char *filename)
1894 {
1895     union {
1896         uint32_t v32;
1897         uint64_t v64;
1898     } u;
1899     FILE *f;
1900     int len;
1901
1902     f = fopen(filename, "rb");
1903     if (!f) {
1904         return -1;
1905     }
1906
1907     len = fread(&u, 1, sizeof(u), f);
1908     fclose(f);
1909     switch (len) {
1910     case 4:
1911         /* property is a 32-bit quantity */
1912         return be32_to_cpu(u.v32);
1913     case 8:
1914         return be64_to_cpu(u.v64);
1915     }
1916
1917     return 0;
1918 }
1919
1920 /* Read a CPU node property from the host device tree that's a single
1921  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1922  * (can't find or open the property, or doesn't understand the
1923  * format) */
1924 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1925 {
1926     char buf[PATH_MAX], *tmp;
1927     uint64_t val;
1928
1929     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1930         return -1;
1931     }
1932
1933     tmp = g_strdup_printf("%s/%s", buf, propname);
1934     val = kvmppc_read_int_dt(tmp);
1935     g_free(tmp);
1936
1937     return val;
1938 }
1939
1940 uint64_t kvmppc_get_clockfreq(void)
1941 {
1942     return kvmppc_read_int_cpu_dt("clock-frequency");
1943 }
1944
1945 uint32_t kvmppc_get_vmx(void)
1946 {
1947     return kvmppc_read_int_cpu_dt("ibm,vmx");
1948 }
1949
1950 uint32_t kvmppc_get_dfp(void)
1951 {
1952     return kvmppc_read_int_cpu_dt("ibm,dfp");
1953 }
1954
1955 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1956  {
1957      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1958      CPUState *cs = CPU(cpu);
1959
1960     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1961         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1962         return 0;
1963     }
1964
1965     return 1;
1966 }
1967
1968 int kvmppc_get_hasidle(CPUPPCState *env)
1969 {
1970     struct kvm_ppc_pvinfo pvinfo;
1971
1972     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1973         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1974         return 1;
1975     }
1976
1977     return 0;
1978 }
1979
1980 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1981 {
1982     uint32_t *hc = (uint32_t*)buf;
1983     struct kvm_ppc_pvinfo pvinfo;
1984
1985     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1986         memcpy(buf, pvinfo.hcall, buf_len);
1987         return 0;
1988     }
1989
1990     /*
1991      * Fallback to always fail hypercalls regardless of endianness:
1992      *
1993      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1994      *     li r3, -1
1995      *     b .+8       (becomes nop in wrong endian)
1996      *     bswap32(li r3, -1)
1997      */
1998
1999     hc[0] = cpu_to_be32(0x08000048);
2000     hc[1] = cpu_to_be32(0x3860ffff);
2001     hc[2] = cpu_to_be32(0x48000008);
2002     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2003
2004     return 0;
2005 }
2006
2007 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2008 {
2009     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2010 }
2011
2012 void kvmppc_enable_logical_ci_hcalls(void)
2013 {
2014     /*
2015      * FIXME: it would be nice if we could detect the cases where
2016      * we're using a device which requires the in kernel
2017      * implementation of these hcalls, but the kernel lacks them and
2018      * produce a warning.
2019      */
2020     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2021     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2022 }
2023
2024 void kvmppc_enable_set_mode_hcall(void)
2025 {
2026     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2027 }
2028
2029 void kvmppc_set_papr(PowerPCCPU *cpu)
2030 {
2031     CPUState *cs = CPU(cpu);
2032     int ret;
2033
2034     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2035     if (ret) {
2036         error_report("This vCPU type or KVM version does not support PAPR");
2037         exit(1);
2038     }
2039
2040     /* Update the capability flag so we sync the right information
2041      * with kvm */
2042     cap_papr = 1;
2043 }
2044
2045 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2046 {
2047     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2048 }
2049
2050 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2051 {
2052     CPUState *cs = CPU(cpu);
2053     int ret;
2054
2055     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2056     if (ret && mpic_proxy) {
2057         error_report("This KVM version does not support EPR");
2058         exit(1);
2059     }
2060 }
2061
2062 int kvmppc_smt_threads(void)
2063 {
2064     return cap_ppc_smt ? cap_ppc_smt : 1;
2065 }
2066
2067 #ifdef TARGET_PPC64
2068 off_t kvmppc_alloc_rma(void **rma)
2069 {
2070     off_t size;
2071     int fd;
2072     struct kvm_allocate_rma ret;
2073
2074     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2075      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2076      *                      not necessary on this hardware
2077      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2078      *
2079      * FIXME: We should allow the user to force contiguous RMA
2080      * allocation in the cap_ppc_rma==1 case.
2081      */
2082     if (cap_ppc_rma < 2) {
2083         return 0;
2084     }
2085
2086     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2087     if (fd < 0) {
2088         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2089                 strerror(errno));
2090         return -1;
2091     }
2092
2093     size = MIN(ret.rma_size, 256ul << 20);
2094
2095     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2096     if (*rma == MAP_FAILED) {
2097         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2098         return -1;
2099     };
2100
2101     return size;
2102 }
2103
2104 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2105 {
2106     struct kvm_ppc_smmu_info info;
2107     long rampagesize, best_page_shift;
2108     int i;
2109
2110     if (cap_ppc_rma >= 2) {
2111         return current_size;
2112     }
2113
2114     /* Find the largest hardware supported page size that's less than
2115      * or equal to the (logical) backing page size of guest RAM */
2116     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2117     rampagesize = getrampagesize();
2118     best_page_shift = 0;
2119
2120     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2121         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2122
2123         if (!sps->page_shift) {
2124             continue;
2125         }
2126
2127         if ((sps->page_shift > best_page_shift)
2128             && ((1UL << sps->page_shift) <= rampagesize)) {
2129             best_page_shift = sps->page_shift;
2130         }
2131     }
2132
2133     return MIN(current_size,
2134                1ULL << (best_page_shift + hash_shift - 7));
2135 }
2136 #endif
2137
2138 bool kvmppc_spapr_use_multitce(void)
2139 {
2140     return cap_spapr_multitce;
2141 }
2142
2143 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2144                               bool need_vfio)
2145 {
2146     struct kvm_create_spapr_tce args = {
2147         .liobn = liobn,
2148         .window_size = window_size,
2149     };
2150     long len;
2151     int fd;
2152     void *table;
2153
2154     /* Must set fd to -1 so we don't try to munmap when called for
2155      * destroying the table, which the upper layers -will- do
2156      */
2157     *pfd = -1;
2158     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2159         return NULL;
2160     }
2161
2162     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2163     if (fd < 0) {
2164         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2165                 liobn);
2166         return NULL;
2167     }
2168
2169     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2170     /* FIXME: round this up to page size */
2171
2172     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2173     if (table == MAP_FAILED) {
2174         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2175                 liobn);
2176         close(fd);
2177         return NULL;
2178     }
2179
2180     *pfd = fd;
2181     return table;
2182 }
2183
2184 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2185 {
2186     long len;
2187
2188     if (fd < 0) {
2189         return -1;
2190     }
2191
2192     len = nb_table * sizeof(uint64_t);
2193     if ((munmap(table, len) < 0) ||
2194         (close(fd) < 0)) {
2195         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2196                 strerror(errno));
2197         /* Leak the table */
2198     }
2199
2200     return 0;
2201 }
2202
2203 int kvmppc_reset_htab(int shift_hint)
2204 {
2205     uint32_t shift = shift_hint;
2206
2207     if (!kvm_enabled()) {
2208         /* Full emulation, tell caller to allocate htab itself */
2209         return 0;
2210     }
2211     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2212         int ret;
2213         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2214         if (ret == -ENOTTY) {
2215             /* At least some versions of PR KVM advertise the
2216              * capability, but don't implement the ioctl().  Oops.
2217              * Return 0 so that we allocate the htab in qemu, as is
2218              * correct for PR. */
2219             return 0;
2220         } else if (ret < 0) {
2221             return ret;
2222         }
2223         return shift;
2224     }
2225
2226     /* We have a kernel that predates the htab reset calls.  For PR
2227      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2228      * this era, it has allocated a 16MB fixed size hash table
2229      * already.  Kernels of this era have the GET_PVINFO capability
2230      * only on PR, so we use this hack to determine the right
2231      * answer */
2232     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2233         /* PR - tell caller to allocate htab */
2234         return 0;
2235     } else {
2236         /* HV - assume 16MB kernel allocated htab */
2237         return 24;
2238     }
2239 }
2240
2241 static inline uint32_t mfpvr(void)
2242 {
2243     uint32_t pvr;
2244
2245     asm ("mfpvr %0"
2246          : "=r"(pvr));
2247     return pvr;
2248 }
2249
2250 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2251 {
2252     if (on) {
2253         *word |= flags;
2254     } else {
2255         *word &= ~flags;
2256     }
2257 }
2258
2259 static void kvmppc_host_cpu_initfn(Object *obj)
2260 {
2261     assert(kvm_enabled());
2262 }
2263
2264 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2265 {
2266     DeviceClass *dc = DEVICE_CLASS(oc);
2267     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2268     uint32_t vmx = kvmppc_get_vmx();
2269     uint32_t dfp = kvmppc_get_dfp();
2270     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2271     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2272
2273     /* Now fix up the class with information we can query from the host */
2274     pcc->pvr = mfpvr();
2275
2276     if (vmx != -1) {
2277         /* Only override when we know what the host supports */
2278         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2279         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2280     }
2281     if (dfp != -1) {
2282         /* Only override when we know what the host supports */
2283         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2284     }
2285
2286     if (dcache_size != -1) {
2287         pcc->l1_dcache_size = dcache_size;
2288     }
2289
2290     if (icache_size != -1) {
2291         pcc->l1_icache_size = icache_size;
2292     }
2293
2294     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2295     dc->cannot_destroy_with_object_finalize_yet = true;
2296 }
2297
2298 bool kvmppc_has_cap_epr(void)
2299 {
2300     return cap_epr;
2301 }
2302
2303 bool kvmppc_has_cap_htab_fd(void)
2304 {
2305     return cap_htab_fd;
2306 }
2307
2308 bool kvmppc_has_cap_fixup_hcalls(void)
2309 {
2310     return cap_fixup_hcalls;
2311 }
2312
2313 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2314 {
2315     ObjectClass *oc = OBJECT_CLASS(pcc);
2316
2317     while (oc && !object_class_is_abstract(oc)) {
2318         oc = object_class_get_parent(oc);
2319     }
2320     assert(oc);
2321
2322     return POWERPC_CPU_CLASS(oc);
2323 }
2324
2325 static int kvm_ppc_register_host_cpu_type(void)
2326 {
2327     TypeInfo type_info = {
2328         .name = TYPE_HOST_POWERPC_CPU,
2329         .instance_init = kvmppc_host_cpu_initfn,
2330         .class_init = kvmppc_host_cpu_class_init,
2331     };
2332     uint32_t host_pvr = mfpvr();
2333     PowerPCCPUClass *pvr_pcc;
2334     DeviceClass *dc;
2335
2336     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2337     if (pvr_pcc == NULL) {
2338         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2339     }
2340     if (pvr_pcc == NULL) {
2341         return -1;
2342     }
2343     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2344     type_register(&type_info);
2345
2346     /* Register generic family CPU class for a family */
2347     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2348     dc = DEVICE_CLASS(pvr_pcc);
2349     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2350     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2351     type_register(&type_info);
2352
2353     return 0;
2354 }
2355
2356 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2357 {
2358     struct kvm_rtas_token_args args = {
2359         .token = token,
2360     };
2361
2362     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2363         return -ENOENT;
2364     }
2365
2366     strncpy(args.name, function, sizeof(args.name));
2367
2368     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2369 }
2370
2371 int kvmppc_get_htab_fd(bool write)
2372 {
2373     struct kvm_get_htab_fd s = {
2374         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2375         .start_index = 0,
2376     };
2377
2378     if (!cap_htab_fd) {
2379         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2380         return -1;
2381     }
2382
2383     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2384 }
2385
2386 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2387 {
2388     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2389     uint8_t buf[bufsize];
2390     ssize_t rc;
2391
2392     do {
2393         rc = read(fd, buf, bufsize);
2394         if (rc < 0) {
2395             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2396                     strerror(errno));
2397             return rc;
2398         } else if (rc) {
2399             uint8_t *buffer = buf;
2400             ssize_t n = rc;
2401             while (n) {
2402                 struct kvm_get_htab_header *head =
2403                     (struct kvm_get_htab_header *) buffer;
2404                 size_t chunksize = sizeof(*head) +
2405                      HASH_PTE_SIZE_64 * head->n_valid;
2406
2407                 qemu_put_be32(f, head->index);
2408                 qemu_put_be16(f, head->n_valid);
2409                 qemu_put_be16(f, head->n_invalid);
2410                 qemu_put_buffer(f, (void *)(head + 1),
2411                                 HASH_PTE_SIZE_64 * head->n_valid);
2412
2413                 buffer += chunksize;
2414                 n -= chunksize;
2415             }
2416         }
2417     } while ((rc != 0)
2418              && ((max_ns < 0)
2419                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2420
2421     return (rc == 0) ? 1 : 0;
2422 }
2423
2424 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2425                            uint16_t n_valid, uint16_t n_invalid)
2426 {
2427     struct kvm_get_htab_header *buf;
2428     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2429     ssize_t rc;
2430
2431     buf = alloca(chunksize);
2432     buf->index = index;
2433     buf->n_valid = n_valid;
2434     buf->n_invalid = n_invalid;
2435
2436     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2437
2438     rc = write(fd, buf, chunksize);
2439     if (rc < 0) {
2440         fprintf(stderr, "Error writing KVM hash table: %s\n",
2441                 strerror(errno));
2442         return rc;
2443     }
2444     if (rc != chunksize) {
2445         /* We should never get a short write on a single chunk */
2446         fprintf(stderr, "Short write, restoring KVM hash table\n");
2447         return -1;
2448     }
2449     return 0;
2450 }
2451
2452 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2453 {
2454     return true;
2455 }
2456
2457 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2458 {
2459     return 1;
2460 }
2461
2462 int kvm_arch_on_sigbus(int code, void *addr)
2463 {
2464     return 1;
2465 }
2466
2467 void kvm_arch_init_irq_routing(KVMState *s)
2468 {
2469 }
2470
2471 struct kvm_get_htab_buf {
2472     struct kvm_get_htab_header header;
2473     /*
2474      * We require one extra byte for read
2475      */
2476     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2477 };
2478
2479 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2480 {
2481     int htab_fd;
2482     struct kvm_get_htab_fd ghf;
2483     struct kvm_get_htab_buf  *hpte_buf;
2484
2485     ghf.flags = 0;
2486     ghf.start_index = pte_index;
2487     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2488     if (htab_fd < 0) {
2489         goto error_out;
2490     }
2491
2492     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2493     /*
2494      * Read the hpte group
2495      */
2496     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2497         goto out_close;
2498     }
2499
2500     close(htab_fd);
2501     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2502
2503 out_close:
2504     g_free(hpte_buf);
2505     close(htab_fd);
2506 error_out:
2507     return 0;
2508 }
2509
2510 void kvmppc_hash64_free_pteg(uint64_t token)
2511 {
2512     struct kvm_get_htab_buf *htab_buf;
2513
2514     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2515                             hpte);
2516     g_free(htab_buf);
2517     return;
2518 }
2519
2520 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2521                              target_ulong pte0, target_ulong pte1)
2522 {
2523     int htab_fd;
2524     struct kvm_get_htab_fd ghf;
2525     struct kvm_get_htab_buf hpte_buf;
2526
2527     ghf.flags = 0;
2528     ghf.start_index = 0;     /* Ignored */
2529     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2530     if (htab_fd < 0) {
2531         goto error_out;
2532     }
2533
2534     hpte_buf.header.n_valid = 1;
2535     hpte_buf.header.n_invalid = 0;
2536     hpte_buf.header.index = pte_index;
2537     hpte_buf.hpte[0] = pte0;
2538     hpte_buf.hpte[1] = pte1;
2539     /*
2540      * Write the hpte entry.
2541      * CAUTION: write() has the warn_unused_result attribute. Hence we
2542      * need to check the return value, even though we do nothing.
2543      */
2544     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2545         goto out_close;
2546     }
2547
2548 out_close:
2549     close(htab_fd);
2550     return;
2551
2552 error_out:
2553     return;
2554 }
2555
2556 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2557                              uint64_t address, uint32_t data, PCIDevice *dev)
2558 {
2559     return 0;
2560 }
2561
2562 int kvm_arch_msi_data_to_gsi(uint32_t data)
2563 {
2564     return data & 0xffff;
2565 }
2566
2567 int kvmppc_enable_hwrng(void)
2568 {
2569     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2570         return -1;
2571     }
2572
2573     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2574 }