target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "cpu-models.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/hw_accel.h"
  31 #include "sysemu/numa.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #if defined(TARGET_PPC64)
  49 #include "hw/ppc/spapr_cpu_core.h"
  50 #endif
  51
  52 //#define DEBUG_KVM
  53
  54 #ifdef DEBUG_KVM
  55 #define DPRINTF(fmt, ...) \
  56     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  57 #else
  58 #define DPRINTF(fmt, ...) \
  59     do { } while (0)
  60 #endif
  61
  62 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  63
  64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  65     KVM_CAP_LAST_INFO
  66 };
  67
  68 static int cap_interrupt_unset = false;
  69 static int cap_interrupt_level = false;
  70 static int cap_segstate;
  71 static int cap_booke_sregs;
  72 static int cap_ppc_smt;
  73 static int cap_ppc_rma;
  74 static int cap_spapr_tce;
  75 static int cap_spapr_multitce;
  76 static int cap_spapr_vfio;
  77 static int cap_hior;
  78 static int cap_one_reg;
  79 static int cap_epr;
  80 static int cap_ppc_watchdog;
  81 static int cap_papr;
  82 static int cap_htab_fd;
  83 static int cap_fixup_hcalls;
  84 static int cap_htm;             /* Hardware transactional memory support */
  85
  86 static uint32_t debug_inst_opcode;
  87
  88 /* XXX We have a race condition where we actually have a level triggered
  89  *     interrupt, but the infrastructure can't expose that yet, so the guest
  90  *     takes but ignores it, goes to sleep and never gets notified that there's
  91  *     still an interrupt pending.
  92  *
  93  *     As a quick workaround, let's just wake up again 20 ms after we injected
  94  *     an interrupt. That way we can assure that we're always reinjecting
  95  *     interrupts in case the guest swallowed them.
  96  */
  97 static QEMUTimer *idle_timer;
  98
  99 static void kvm_kick_cpu(void *opaque)
 100 {
 101     PowerPCCPU *cpu = opaque;
 102
 103     qemu_cpu_kick(CPU(cpu));
 104 }
 105
 106 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 107  * should only be used for fallback tests - generally we should use
 108  * explicit capabilities for the features we want, rather than
 109  * assuming what is/isn't available depending on the KVM variant. */
 110 static bool kvmppc_is_pr(KVMState *ks)
 111 {
 112     /* Assume KVM-PR if the GET_PVINFO capability is available */
 113     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 114 }
 115
 116 static int kvm_ppc_register_host_cpu_type(void);
 117
 118 int kvm_arch_init(MachineState *ms, KVMState *s)
 119 {
 120     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 121     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 122     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 123     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 124     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 125     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 126     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 127     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 128     cap_spapr_vfio = false;
 129     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 130     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 131     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 132     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 133     /* Note: we don't set cap_papr here, because this capability is
 134      * only activated after this by kvmppc_set_papr() */
 135     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 136     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 137     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 138
 139     if (!cap_interrupt_level) {
 140         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 141                         "VM to stall at times!\n");
 142     }
 143
 144     kvm_ppc_register_host_cpu_type();
 145
 146     return 0;
 147 }
 148
 149 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 150 {
 151     return 0;
 152 }
 153
 154 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 155 {
 156     CPUPPCState *cenv = &cpu->env;
 157     CPUState *cs = CPU(cpu);
 158     struct kvm_sregs sregs;
 159     int ret;
 160
 161     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 162         /* What we're really trying to say is "if we're on BookE, we use
 163            the native PVR for now". This is the only sane way to check
 164            it though, so we potentially confuse users that they can run
 165            BookE guests on BookS. Let's hope nobody dares enough :) */
 166         return 0;
 167     } else {
 168         if (!cap_segstate) {
 169             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 170             return -ENOSYS;
 171         }
 172     }
 173
 174     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 175     if (ret) {
 176         return ret;
 177     }
 178
 179     sregs.pvr = cenv->spr[SPR_PVR];
 180     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 181 }
 182
 183 /* Set up a shared TLB array with KVM */
 184 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 185 {
 186     CPUPPCState *env = &cpu->env;
 187     CPUState *cs = CPU(cpu);
 188     struct kvm_book3e_206_tlb_params params = {};
 189     struct kvm_config_tlb cfg = {};
 190     unsigned int entries = 0;
 191     int ret, i;
 192
 193     if (!kvm_enabled() ||
 194         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 195         return 0;
 196     }
 197
 198     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 199
 200     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 201         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 202         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 203         entries += params.tlb_sizes[i];
 204     }
 205
 206     assert(entries == env->nb_tlb);
 207     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 208
 209     env->tlb_dirty = true;
 210
 211     cfg.array = (uintptr_t)env->tlb.tlbm;
 212     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 213     cfg.params = (uintptr_t)&params;
 214     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 215
 216     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 217     if (ret < 0) {
 218         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 219                 __func__, strerror(-ret));
 220         return ret;
 221     }
 222
 223     env->kvm_sw_tlb = true;
 224     return 0;
 225 }
 226
 227
 228 #if defined(TARGET_PPC64)
 229 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 230                                        struct kvm_ppc_smmu_info *info)
 231 {
 232     CPUPPCState *env = &cpu->env;
 233     CPUState *cs = CPU(cpu);
 234
 235     memset(info, 0, sizeof(*info));
 236
 237     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 238      * need to "guess" what the supported page sizes are.
 239      *
 240      * For that to work we make a few assumptions:
 241      *
 242      * - Check whether we are running "PR" KVM which only supports 4K
 243      *   and 16M pages, but supports them regardless of the backing
 244      *   store characteritics. We also don't support 1T segments.
 245      *
 246      *   This is safe as if HV KVM ever supports that capability or PR
 247      *   KVM grows supports for more page/segment sizes, those versions
 248      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 249      *   will not hit this fallback
 250      *
 251      * - Else we are running HV KVM. This means we only support page
 252      *   sizes that fit in the backing store. Additionally we only
 253      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 254      *   P7 encodings for the SLB and hash table. Here too, we assume
 255      *   support for any newer processor will mean a kernel that
 256      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 257      *   this fallback.
 258      */
 259     if (kvmppc_is_pr(cs->kvm_state)) {
 260         /* No flags */
 261         info->flags = 0;
 262         info->slb_size = 64;
 263
 264         /* Standard 4k base page size segment */
 265         info->sps[0].page_shift = 12;
 266         info->sps[0].slb_enc = 0;
 267         info->sps[0].enc[0].page_shift = 12;
 268         info->sps[0].enc[0].pte_enc = 0;
 269
 270         /* Standard 16M large page size segment */
 271         info->sps[1].page_shift = 24;
 272         info->sps[1].slb_enc = SLB_VSID_L;
 273         info->sps[1].enc[0].page_shift = 24;
 274         info->sps[1].enc[0].pte_enc = 0;
 275     } else {
 276         int i = 0;
 277
 278         /* HV KVM has backing store size restrictions */
 279         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 280
 281         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 282             info->flags |= KVM_PPC_1T_SEGMENTS;
 283         }
 284
 285         if (env->mmu_model == POWERPC_MMU_2_06 ||
 286             env->mmu_model == POWERPC_MMU_2_07) {
 287             info->slb_size = 32;
 288         } else {
 289             info->slb_size = 64;
 290         }
 291
 292         /* Standard 4k base page size segment */
 293         info->sps[i].page_shift = 12;
 294         info->sps[i].slb_enc = 0;
 295         info->sps[i].enc[0].page_shift = 12;
 296         info->sps[i].enc[0].pte_enc = 0;
 297         i++;
 298
 299         /* 64K on MMU 2.06 and later */
 300         if (env->mmu_model == POWERPC_MMU_2_06 ||
 301             env->mmu_model == POWERPC_MMU_2_07) {
 302             info->sps[i].page_shift = 16;
 303             info->sps[i].slb_enc = 0x110;
 304             info->sps[i].enc[0].page_shift = 16;
 305             info->sps[i].enc[0].pte_enc = 1;
 306             i++;
 307         }
 308
 309         /* Standard 16M large page size segment */
 310         info->sps[i].page_shift = 24;
 311         info->sps[i].slb_enc = SLB_VSID_L;
 312         info->sps[i].enc[0].page_shift = 24;
 313         info->sps[i].enc[0].pte_enc = 0;
 314     }
 315 }
 316
 317 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 318 {
 319     CPUState *cs = CPU(cpu);
 320     int ret;
 321
 322     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 323         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 324         if (ret == 0) {
 325             return;
 326         }
 327     }
 328
 329     kvm_get_fallback_smmu_info(cpu, info);
 330 }
 331
 332 static long gethugepagesize(const char *mem_path)
 333 {
 334     struct statfs fs;
 335     int ret;
 336
 337     do {
 338         ret = statfs(mem_path, &fs);
 339     } while (ret != 0 && errno == EINTR);
 340
 341     if (ret != 0) {
 342         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 343                 strerror(errno));
 344         exit(1);
 345     }
 346
 347 #define HUGETLBFS_MAGIC       0x958458f6
 348
 349     if (fs.f_type != HUGETLBFS_MAGIC) {
 350         /* Explicit mempath, but it's ordinary pages */
 351         return getpagesize();
 352     }
 353
 354     /* It's hugepage, return the huge page size */
 355     return fs.f_bsize;
 356 }
 357
 358 /*
 359  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 360  * may or may not name the same files / on the same filesystem now as
 361  * when we actually open and map them.  Iterate over the file
 362  * descriptors instead, and use qemu_fd_getpagesize().
 363  */
 364 static int find_max_supported_pagesize(Object *obj, void *opaque)
 365 {
 366     char *mem_path;
 367     long *hpsize_min = opaque;
 368
 369     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 370         mem_path = object_property_get_str(obj, "mem-path", NULL);
 371         if (mem_path) {
 372             long hpsize = gethugepagesize(mem_path);
 373             if (hpsize < *hpsize_min) {
 374                 *hpsize_min = hpsize;
 375             }
 376         } else {
 377             *hpsize_min = getpagesize();
 378         }
 379     }
 380
 381     return 0;
 382 }
 383
 384 static long getrampagesize(void)
 385 {
 386     long hpsize = LONG_MAX;
 387     long mainrampagesize;
 388     Object *memdev_root;
 389
 390     if (mem_path) {
 391         mainrampagesize = gethugepagesize(mem_path);
 392     } else {
 393         mainrampagesize = getpagesize();
 394     }
 395
 396     /* it's possible we have memory-backend objects with
 397      * hugepage-backed RAM. these may get mapped into system
 398      * address space via -numa parameters or memory hotplug
 399      * hooks. we want to take these into account, but we
 400      * also want to make sure these supported hugepage
 401      * sizes are applicable across the entire range of memory
 402      * we may boot from, so we take the min across all
 403      * backends, and assume normal pages in cases where a
 404      * backend isn't backed by hugepages.
 405      */
 406     memdev_root = object_resolve_path("/objects", NULL);
 407     if (memdev_root) {
 408         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 409     }
 410     if (hpsize == LONG_MAX) {
 411         /* No additional memory regions found ==> Report main RAM page size */
 412         return mainrampagesize;
 413     }
 414
 415     /* If NUMA is disabled or the NUMA nodes are not backed with a
 416      * memory-backend, then there is at least one node using "normal" RAM,
 417      * so if its page size is smaller we have got to report that size instead.
 418      */
 419     if (hpsize > mainrampagesize &&
 420         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 421         static bool warned;
 422         if (!warned) {
 423             error_report("Huge page support disabled (n/a for main memory).");
 424             warned = true;
 425         }
 426         return mainrampagesize;
 427     }
 428
 429     return hpsize;
 430 }
 431
 432 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 433 {
 434     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 435         return true;
 436     }
 437
 438     return (1ul << shift) <= rampgsize;
 439 }
 440
 441 static long max_cpu_page_size;
 442
 443 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 444 {
 445     static struct kvm_ppc_smmu_info smmu_info;
 446     static bool has_smmu_info;
 447     CPUPPCState *env = &cpu->env;
 448     int iq, ik, jq, jk;
 449     bool has_64k_pages = false;
 450
 451     /* We only handle page sizes for 64-bit server guests for now */
 452     if (!(env->mmu_model & POWERPC_MMU_64)) {
 453         return;
 454     }
 455
 456     /* Collect MMU info from kernel if not already */
 457     if (!has_smmu_info) {
 458         kvm_get_smmu_info(cpu, &smmu_info);
 459         has_smmu_info = true;
 460     }
 461
 462     if (!max_cpu_page_size) {
 463         max_cpu_page_size = getrampagesize();
 464     }
 465
 466     /* Convert to QEMU form */
 467     memset(&env->sps, 0, sizeof(env->sps));
 468
 469     /* If we have HV KVM, we need to forbid CI large pages if our
 470      * host page size is smaller than 64K.
 471      */
 472     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 473         env->ci_large_pages = getpagesize() >= 0x10000;
 474     }
 475
 476     /*
 477      * XXX This loop should be an entry wide AND of the capabilities that
 478      *     the selected CPU has with the capabilities that KVM supports.
 479      */
 480     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 481         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 482         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 483
 484         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 485                                  ksps->page_shift)) {
 486             continue;
 487         }
 488         qsps->page_shift = ksps->page_shift;
 489         qsps->slb_enc = ksps->slb_enc;
 490         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 491             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 492                                      ksps->enc[jk].page_shift)) {
 493                 continue;
 494             }
 495             if (ksps->enc[jk].page_shift == 16) {
 496                 has_64k_pages = true;
 497             }
 498             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 499             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 500             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 501                 break;
 502             }
 503         }
 504         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 505             break;
 506         }
 507     }
 508     env->slb_nr = smmu_info.slb_size;
 509     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 510         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 511     }
 512     if (!has_64k_pages) {
 513         env->mmu_model &= ~POWERPC_MMU_64K;
 514     }
 515 }
 516
 517 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 518 {
 519     Object *mem_obj = object_resolve_path(obj_path, NULL);
 520     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 521     long pagesize;
 522
 523     if (mempath) {
 524         pagesize = gethugepagesize(mempath);
 525     } else {
 526         pagesize = getpagesize();
 527     }
 528
 529     return pagesize >= max_cpu_page_size;
 530 }
 531
 532 #else /* defined (TARGET_PPC64) */
 533
 534 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 535 {
 536 }
 537
 538 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 539 {
 540     return true;
 541 }
 542
 543 #endif /* !defined (TARGET_PPC64) */
 544
 545 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 546 {
 547     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 548 }
 549
 550 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 551  * book3s supports only 1 watchpoint, so array size
 552  * of 4 is sufficient for now.
 553  */
 554 #define MAX_HW_BKPTS 4
 555
 556 static struct HWBreakpoint {
 557     target_ulong addr;
 558     int type;
 559 } hw_debug_points[MAX_HW_BKPTS];
 560
 561 static CPUWatchpoint hw_watchpoint;
 562
 563 /* Default there is no breakpoint and watchpoint supported */
 564 static int max_hw_breakpoint;
 565 static int max_hw_watchpoint;
 566 static int nb_hw_breakpoint;
 567 static int nb_hw_watchpoint;
 568
 569 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 570 {
 571     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 572         max_hw_breakpoint = 2;
 573         max_hw_watchpoint = 2;
 574     }
 575
 576     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 577         fprintf(stderr, "Error initializing h/w breakpoints\n");
 578         return;
 579     }
 580 }
 581
 582 int kvm_arch_init_vcpu(CPUState *cs)
 583 {
 584     PowerPCCPU *cpu = POWERPC_CPU(cs);
 585     CPUPPCState *cenv = &cpu->env;
 586     int ret;
 587
 588     /* Gather server mmu info from KVM and update the CPU state */
 589     kvm_fixup_page_sizes(cpu);
 590
 591     /* Synchronize sregs with kvm */
 592     ret = kvm_arch_sync_sregs(cpu);
 593     if (ret) {
 594         if (ret == -EINVAL) {
 595             error_report("Register sync failed... If you're using kvm-hv.ko,"
 596                          " only \"-cpu host\" is possible");
 597         }
 598         return ret;
 599     }
 600
 601     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 602
 603     switch (cenv->mmu_model) {
 604     case POWERPC_MMU_BOOKE206:
 605         /* This target supports access to KVM's guest TLB */
 606         ret = kvm_booke206_tlb_init(cpu);
 607         break;
 608     case POWERPC_MMU_2_07:
 609         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 610             /* KVM-HV has transactional memory on POWER8 also without the
 611              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
 612             cap_htm = true;
 613         }
 614         break;
 615     default:
 616         break;
 617     }
 618
 619     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 620     kvmppc_hw_debug_points_init(cenv);
 621
 622     return ret;
 623 }
 624
 625 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 626 {
 627     CPUPPCState *env = &cpu->env;
 628     CPUState *cs = CPU(cpu);
 629     struct kvm_dirty_tlb dirty_tlb;
 630     unsigned char *bitmap;
 631     int ret;
 632
 633     if (!env->kvm_sw_tlb) {
 634         return;
 635     }
 636
 637     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 638     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 639
 640     dirty_tlb.bitmap = (uintptr_t)bitmap;
 641     dirty_tlb.num_dirty = env->nb_tlb;
 642
 643     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 644     if (ret) {
 645         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 646                 __func__, strerror(-ret));
 647     }
 648
 649     g_free(bitmap);
 650 }
 651
 652 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 653 {
 654     PowerPCCPU *cpu = POWERPC_CPU(cs);
 655     CPUPPCState *env = &cpu->env;
 656     union {
 657         uint32_t u32;
 658         uint64_t u64;
 659     } val;
 660     struct kvm_one_reg reg = {
 661         .id = id,
 662         .addr = (uintptr_t) &val,
 663     };
 664     int ret;
 665
 666     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 667     if (ret != 0) {
 668         trace_kvm_failed_spr_get(spr, strerror(errno));
 669     } else {
 670         switch (id & KVM_REG_SIZE_MASK) {
 671         case KVM_REG_SIZE_U32:
 672             env->spr[spr] = val.u32;
 673             break;
 674
 675         case KVM_REG_SIZE_U64:
 676             env->spr[spr] = val.u64;
 677             break;
 678
 679         default:
 680             /* Don't handle this size yet */
 681             abort();
 682         }
 683     }
 684 }
 685
 686 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 687 {
 688     PowerPCCPU *cpu = POWERPC_CPU(cs);
 689     CPUPPCState *env = &cpu->env;
 690     union {
 691         uint32_t u32;
 692         uint64_t u64;
 693     } val;
 694     struct kvm_one_reg reg = {
 695         .id = id,
 696         .addr = (uintptr_t) &val,
 697     };
 698     int ret;
 699
 700     switch (id & KVM_REG_SIZE_MASK) {
 701     case KVM_REG_SIZE_U32:
 702         val.u32 = env->spr[spr];
 703         break;
 704
 705     case KVM_REG_SIZE_U64:
 706         val.u64 = env->spr[spr];
 707         break;
 708
 709     default:
 710         /* Don't handle this size yet */
 711         abort();
 712     }
 713
 714     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 715     if (ret != 0) {
 716         trace_kvm_failed_spr_set(spr, strerror(errno));
 717     }
 718 }
 719
 720 static int kvm_put_fp(CPUState *cs)
 721 {
 722     PowerPCCPU *cpu = POWERPC_CPU(cs);
 723     CPUPPCState *env = &cpu->env;
 724     struct kvm_one_reg reg;
 725     int i;
 726     int ret;
 727
 728     if (env->insns_flags & PPC_FLOAT) {
 729         uint64_t fpscr = env->fpscr;
 730         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 731
 732         reg.id = KVM_REG_PPC_FPSCR;
 733         reg.addr = (uintptr_t)&fpscr;
 734         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 735         if (ret < 0) {
 736             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 737             return ret;
 738         }
 739
 740         for (i = 0; i < 32; i++) {
 741             uint64_t vsr[2];
 742
 743 #ifdef HOST_WORDS_BIGENDIAN
 744             vsr[0] = float64_val(env->fpr[i]);
 745             vsr[1] = env->vsr[i];
 746 #else
 747             vsr[0] = env->vsr[i];
 748             vsr[1] = float64_val(env->fpr[i]);
 749 #endif
 750             reg.addr = (uintptr_t) &vsr;
 751             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 752
 753             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 754             if (ret < 0) {
 755                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 756                         i, strerror(errno));
 757                 return ret;
 758             }
 759         }
 760     }
 761
 762     if (env->insns_flags & PPC_ALTIVEC) {
 763         reg.id = KVM_REG_PPC_VSCR;
 764         reg.addr = (uintptr_t)&env->vscr;
 765         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 766         if (ret < 0) {
 767             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 768             return ret;
 769         }
 770
 771         for (i = 0; i < 32; i++) {
 772             reg.id = KVM_REG_PPC_VR(i);
 773             reg.addr = (uintptr_t)&env->avr[i];
 774             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 775             if (ret < 0) {
 776                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 777                 return ret;
 778             }
 779         }
 780     }
 781
 782     return 0;
 783 }
 784
 785 static int kvm_get_fp(CPUState *cs)
 786 {
 787     PowerPCCPU *cpu = POWERPC_CPU(cs);
 788     CPUPPCState *env = &cpu->env;
 789     struct kvm_one_reg reg;
 790     int i;
 791     int ret;
 792
 793     if (env->insns_flags & PPC_FLOAT) {
 794         uint64_t fpscr;
 795         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 796
 797         reg.id = KVM_REG_PPC_FPSCR;
 798         reg.addr = (uintptr_t)&fpscr;
 799         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 800         if (ret < 0) {
 801             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 802             return ret;
 803         } else {
 804             env->fpscr = fpscr;
 805         }
 806
 807         for (i = 0; i < 32; i++) {
 808             uint64_t vsr[2];
 809
 810             reg.addr = (uintptr_t) &vsr;
 811             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 812
 813             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 814             if (ret < 0) {
 815                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 816                         vsx ? "VSR" : "FPR", i, strerror(errno));
 817                 return ret;
 818             } else {
 819 #ifdef HOST_WORDS_BIGENDIAN
 820                 env->fpr[i] = vsr[0];
 821                 if (vsx) {
 822                     env->vsr[i] = vsr[1];
 823                 }
 824 #else
 825                 env->fpr[i] = vsr[1];
 826                 if (vsx) {
 827                     env->vsr[i] = vsr[0];
 828                 }
 829 #endif
 830             }
 831         }
 832     }
 833
 834     if (env->insns_flags & PPC_ALTIVEC) {
 835         reg.id = KVM_REG_PPC_VSCR;
 836         reg.addr = (uintptr_t)&env->vscr;
 837         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 838         if (ret < 0) {
 839             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 840             return ret;
 841         }
 842
 843         for (i = 0; i < 32; i++) {
 844             reg.id = KVM_REG_PPC_VR(i);
 845             reg.addr = (uintptr_t)&env->avr[i];
 846             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 847             if (ret < 0) {
 848                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 849                         i, strerror(errno));
 850                 return ret;
 851             }
 852         }
 853     }
 854
 855     return 0;
 856 }
 857
 858 #if defined(TARGET_PPC64)
 859 static int kvm_get_vpa(CPUState *cs)
 860 {
 861     PowerPCCPU *cpu = POWERPC_CPU(cs);
 862     CPUPPCState *env = &cpu->env;
 863     struct kvm_one_reg reg;
 864     int ret;
 865
 866     reg.id = KVM_REG_PPC_VPA_ADDR;
 867     reg.addr = (uintptr_t)&env->vpa_addr;
 868     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 869     if (ret < 0) {
 870         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 871         return ret;
 872     }
 873
 874     assert((uintptr_t)&env->slb_shadow_size
 875            == ((uintptr_t)&env->slb_shadow_addr + 8));
 876     reg.id = KVM_REG_PPC_VPA_SLB;
 877     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 878     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 879     if (ret < 0) {
 880         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 881                 strerror(errno));
 882         return ret;
 883     }
 884
 885     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 886     reg.id = KVM_REG_PPC_VPA_DTL;
 887     reg.addr = (uintptr_t)&env->dtl_addr;
 888     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 889     if (ret < 0) {
 890         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 891                 strerror(errno));
 892         return ret;
 893     }
 894
 895     return 0;
 896 }
 897
 898 static int kvm_put_vpa(CPUState *cs)
 899 {
 900     PowerPCCPU *cpu = POWERPC_CPU(cs);
 901     CPUPPCState *env = &cpu->env;
 902     struct kvm_one_reg reg;
 903     int ret;
 904
 905     /* SLB shadow or DTL can't be registered unless a master VPA is
 906      * registered.  That means when restoring state, if a VPA *is*
 907      * registered, we need to set that up first.  If not, we need to
 908      * deregister the others before deregistering the master VPA */
 909     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 910
 911     if (env->vpa_addr) {
 912         reg.id = KVM_REG_PPC_VPA_ADDR;
 913         reg.addr = (uintptr_t)&env->vpa_addr;
 914         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 915         if (ret < 0) {
 916             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 917             return ret;
 918         }
 919     }
 920
 921     assert((uintptr_t)&env->slb_shadow_size
 922            == ((uintptr_t)&env->slb_shadow_addr + 8));
 923     reg.id = KVM_REG_PPC_VPA_SLB;
 924     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 925     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 926     if (ret < 0) {
 927         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 928         return ret;
 929     }
 930
 931     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 932     reg.id = KVM_REG_PPC_VPA_DTL;
 933     reg.addr = (uintptr_t)&env->dtl_addr;
 934     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 935     if (ret < 0) {
 936         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 937                 strerror(errno));
 938         return ret;
 939     }
 940
 941     if (!env->vpa_addr) {
 942         reg.id = KVM_REG_PPC_VPA_ADDR;
 943         reg.addr = (uintptr_t)&env->vpa_addr;
 944         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 945         if (ret < 0) {
 946             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 947             return ret;
 948         }
 949     }
 950
 951     return 0;
 952 }
 953 #endif /* TARGET_PPC64 */
 954
 955 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 956 {
 957     CPUPPCState *env = &cpu->env;
 958     struct kvm_sregs sregs;
 959     int i;
 960
 961     sregs.pvr = env->spr[SPR_PVR];
 962
 963     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 964
 965     /* Sync SLB */
 966 #ifdef TARGET_PPC64
 967     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 968         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 969         if (env->slb[i].esid & SLB_ESID_V) {
 970             sregs.u.s.ppc64.slb[i].slbe |= i;
 971         }
 972         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 973     }
 974 #endif
 975
 976     /* Sync SRs */
 977     for (i = 0; i < 16; i++) {
 978         sregs.u.s.ppc32.sr[i] = env->sr[i];
 979     }
 980
 981     /* Sync BATs */
 982     for (i = 0; i < 8; i++) {
 983         /* Beware. We have to swap upper and lower bits here */
 984         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 985             | env->DBAT[1][i];
 986         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 987             | env->IBAT[1][i];
 988     }
 989
 990     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 991 }
 992
 993 int kvm_arch_put_registers(CPUState *cs, int level)
 994 {
 995     PowerPCCPU *cpu = POWERPC_CPU(cs);
 996     CPUPPCState *env = &cpu->env;
 997     struct kvm_regs regs;
 998     int ret;
 999     int i;
1000
1001     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1002     if (ret < 0) {
1003         return ret;
1004     }
1005
1006     regs.ctr = env->ctr;
1007     regs.lr  = env->lr;
1008     regs.xer = cpu_read_xer(env);
1009     regs.msr = env->msr;
1010     regs.pc = env->nip;
1011
1012     regs.srr0 = env->spr[SPR_SRR0];
1013     regs.srr1 = env->spr[SPR_SRR1];
1014
1015     regs.sprg0 = env->spr[SPR_SPRG0];
1016     regs.sprg1 = env->spr[SPR_SPRG1];
1017     regs.sprg2 = env->spr[SPR_SPRG2];
1018     regs.sprg3 = env->spr[SPR_SPRG3];
1019     regs.sprg4 = env->spr[SPR_SPRG4];
1020     regs.sprg5 = env->spr[SPR_SPRG5];
1021     regs.sprg6 = env->spr[SPR_SPRG6];
1022     regs.sprg7 = env->spr[SPR_SPRG7];
1023
1024     regs.pid = env->spr[SPR_BOOKE_PID];
1025
1026     for (i = 0;i < 32; i++)
1027         regs.gpr[i] = env->gpr[i];
1028
1029     regs.cr = 0;
1030     for (i = 0; i < 8; i++) {
1031         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1032     }
1033
1034     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1035     if (ret < 0)
1036         return ret;
1037
1038     kvm_put_fp(cs);
1039
1040     if (env->tlb_dirty) {
1041         kvm_sw_tlb_put(cpu);
1042         env->tlb_dirty = false;
1043     }
1044
1045     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1046         ret = kvmppc_put_books_sregs(cpu);
1047         if (ret < 0) {
1048             return ret;
1049         }
1050     }
1051
1052     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1053         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1054     }
1055
1056     if (cap_one_reg) {
1057         int i;
1058
1059         /* We deliberately ignore errors here, for kernels which have
1060          * the ONE_REG calls, but don't support the specific
1061          * registers, there's a reasonable chance things will still
1062          * work, at least until we try to migrate. */
1063         for (i = 0; i < 1024; i++) {
1064             uint64_t id = env->spr_cb[i].one_reg_id;
1065
1066             if (id != 0) {
1067                 kvm_put_one_spr(cs, id, i);
1068             }
1069         }
1070
1071 #ifdef TARGET_PPC64
1072         if (msr_ts) {
1073             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1074                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1075             }
1076             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1077                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1078             }
1079             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1080             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1081             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1082             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1083             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1084             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1085             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1086             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1087             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1088             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1089         }
1090
1091         if (cap_papr) {
1092             if (kvm_put_vpa(cs) < 0) {
1093                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1094             }
1095         }
1096
1097         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1098 #endif /* TARGET_PPC64 */
1099     }
1100
1101     return ret;
1102 }
1103
1104 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1105 {
1106      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1107 }
1108
1109 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1110 {
1111     CPUPPCState *env = &cpu->env;
1112     struct kvm_sregs sregs;
1113     int ret;
1114
1115     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1116     if (ret < 0) {
1117         return ret;
1118     }
1119
1120     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1121         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1122         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1123         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1124         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1125         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1126         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1127         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1128         env->spr[SPR_DECR] = sregs.u.e.dec;
1129         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1130         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1131         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1132     }
1133
1134     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1135         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1136         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1137         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1138         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1139         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1140     }
1141
1142     if (sregs.u.e.features & KVM_SREGS_E_64) {
1143         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1144     }
1145
1146     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1147         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1148     }
1149
1150     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1151         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1152         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1153         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1154         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1155         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1156         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1157         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1158         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1159         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1160         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1161         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1162         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1163         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1164         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1165         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1166         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1167         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1168         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1169         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1170         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1171         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1172         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1173         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1174         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1175         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1176         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1177         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1178         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1179         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1180         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1181         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1182         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1183
1184         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1185             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1186             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1187             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1188             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1189             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1190             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1191         }
1192
1193         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1194             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1195             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1196         }
1197
1198         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1199             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1200             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1201             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1202             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1203         }
1204     }
1205
1206     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1207         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1208         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1209         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1210         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1211         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1212         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1213         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1214         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1215         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1216         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1217     }
1218
1219     if (sregs.u.e.features & KVM_SREGS_EXP) {
1220         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1221     }
1222
1223     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1224         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1225         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1226     }
1227
1228     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1229         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1230         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1231         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1232
1233         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1234             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1235             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1236         }
1237     }
1238
1239     return 0;
1240 }
1241
1242 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1243 {
1244     CPUPPCState *env = &cpu->env;
1245     struct kvm_sregs sregs;
1246     int ret;
1247     int i;
1248
1249     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1250     if (ret < 0) {
1251         return ret;
1252     }
1253
1254     if (!cpu->vhyp) {
1255         ppc_store_sdr1(env, sregs.u.s.sdr1);
1256     }
1257
1258     /* Sync SLB */
1259 #ifdef TARGET_PPC64
1260     /*
1261      * The packed SLB array we get from KVM_GET_SREGS only contains
1262      * information about valid entries. So we flush our internal copy
1263      * to get rid of stale ones, then put all valid SLB entries back
1264      * in.
1265      */
1266     memset(env->slb, 0, sizeof(env->slb));
1267     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1268         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1269         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1270         /*
1271          * Only restore valid entries
1272          */
1273         if (rb & SLB_ESID_V) {
1274             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1275         }
1276     }
1277 #endif
1278
1279     /* Sync SRs */
1280     for (i = 0; i < 16; i++) {
1281         env->sr[i] = sregs.u.s.ppc32.sr[i];
1282     }
1283
1284     /* Sync BATs */
1285     for (i = 0; i < 8; i++) {
1286         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1287         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1288         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1289         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1290     }
1291
1292     return 0;
1293 }
1294
1295 int kvm_arch_get_registers(CPUState *cs)
1296 {
1297     PowerPCCPU *cpu = POWERPC_CPU(cs);
1298     CPUPPCState *env = &cpu->env;
1299     struct kvm_regs regs;
1300     uint32_t cr;
1301     int i, ret;
1302
1303     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1304     if (ret < 0)
1305         return ret;
1306
1307     cr = regs.cr;
1308     for (i = 7; i >= 0; i--) {
1309         env->crf[i] = cr & 15;
1310         cr >>= 4;
1311     }
1312
1313     env->ctr = regs.ctr;
1314     env->lr = regs.lr;
1315     cpu_write_xer(env, regs.xer);
1316     env->msr = regs.msr;
1317     env->nip = regs.pc;
1318
1319     env->spr[SPR_SRR0] = regs.srr0;
1320     env->spr[SPR_SRR1] = regs.srr1;
1321
1322     env->spr[SPR_SPRG0] = regs.sprg0;
1323     env->spr[SPR_SPRG1] = regs.sprg1;
1324     env->spr[SPR_SPRG2] = regs.sprg2;
1325     env->spr[SPR_SPRG3] = regs.sprg3;
1326     env->spr[SPR_SPRG4] = regs.sprg4;
1327     env->spr[SPR_SPRG5] = regs.sprg5;
1328     env->spr[SPR_SPRG6] = regs.sprg6;
1329     env->spr[SPR_SPRG7] = regs.sprg7;
1330
1331     env->spr[SPR_BOOKE_PID] = regs.pid;
1332
1333     for (i = 0;i < 32; i++)
1334         env->gpr[i] = regs.gpr[i];
1335
1336     kvm_get_fp(cs);
1337
1338     if (cap_booke_sregs) {
1339         ret = kvmppc_get_booke_sregs(cpu);
1340         if (ret < 0) {
1341             return ret;
1342         }
1343     }
1344
1345     if (cap_segstate) {
1346         ret = kvmppc_get_books_sregs(cpu);
1347         if (ret < 0) {
1348             return ret;
1349         }
1350     }
1351
1352     if (cap_hior) {
1353         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1354     }
1355
1356     if (cap_one_reg) {
1357         int i;
1358
1359         /* We deliberately ignore errors here, for kernels which have
1360          * the ONE_REG calls, but don't support the specific
1361          * registers, there's a reasonable chance things will still
1362          * work, at least until we try to migrate. */
1363         for (i = 0; i < 1024; i++) {
1364             uint64_t id = env->spr_cb[i].one_reg_id;
1365
1366             if (id != 0) {
1367                 kvm_get_one_spr(cs, id, i);
1368             }
1369         }
1370
1371 #ifdef TARGET_PPC64
1372         if (msr_ts) {
1373             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1374                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1375             }
1376             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1377                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1378             }
1379             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1380             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1381             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1382             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1383             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1384             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1385             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1386             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1387             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1388             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1389         }
1390
1391         if (cap_papr) {
1392             if (kvm_get_vpa(cs) < 0) {
1393                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1394             }
1395         }
1396
1397         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1398 #endif
1399     }
1400
1401     return 0;
1402 }
1403
1404 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1405 {
1406     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1407
1408     if (irq != PPC_INTERRUPT_EXT) {
1409         return 0;
1410     }
1411
1412     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1413         return 0;
1414     }
1415
1416     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1417
1418     return 0;
1419 }
1420
1421 #if defined(TARGET_PPCEMB)
1422 #define PPC_INPUT_INT PPC40x_INPUT_INT
1423 #elif defined(TARGET_PPC64)
1424 #define PPC_INPUT_INT PPC970_INPUT_INT
1425 #else
1426 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1427 #endif
1428
1429 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1430 {
1431     PowerPCCPU *cpu = POWERPC_CPU(cs);
1432     CPUPPCState *env = &cpu->env;
1433     int r;
1434     unsigned irq;
1435
1436     qemu_mutex_lock_iothread();
1437
1438     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1439      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1440     if (!cap_interrupt_level &&
1441         run->ready_for_interrupt_injection &&
1442         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1443         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1444     {
1445         /* For now KVM disregards the 'irq' argument. However, in the
1446          * future KVM could cache it in-kernel to avoid a heavyweight exit
1447          * when reading the UIC.
1448          */
1449         irq = KVM_INTERRUPT_SET;
1450
1451         DPRINTF("injected interrupt %d\n", irq);
1452         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1453         if (r < 0) {
1454             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1455         }
1456
1457         /* Always wake up soon in case the interrupt was level based */
1458         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1459                        (NANOSECONDS_PER_SECOND / 50));
1460     }
1461
1462     /* We don't know if there are more interrupts pending after this. However,
1463      * the guest will return to userspace in the course of handling this one
1464      * anyways, so we will get a chance to deliver the rest. */
1465
1466     qemu_mutex_unlock_iothread();
1467 }
1468
1469 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1470 {
1471     return MEMTXATTRS_UNSPECIFIED;
1472 }
1473
1474 int kvm_arch_process_async_events(CPUState *cs)
1475 {
1476     return cs->halted;
1477 }
1478
1479 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1480 {
1481     CPUState *cs = CPU(cpu);
1482     CPUPPCState *env = &cpu->env;
1483
1484     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1485         cs->halted = 1;
1486         cs->exception_index = EXCP_HLT;
1487     }
1488
1489     return 0;
1490 }
1491
1492 /* map dcr access to existing qemu dcr emulation */
1493 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1494 {
1495     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1496         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1497
1498     return 0;
1499 }
1500
1501 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1502 {
1503     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1504         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1505
1506     return 0;
1507 }
1508
1509 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1510 {
1511     /* Mixed endian case is not handled */
1512     uint32_t sc = debug_inst_opcode;
1513
1514     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1515                             sizeof(sc), 0) ||
1516         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1517         return -EINVAL;
1518     }
1519
1520     return 0;
1521 }
1522
1523 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1524 {
1525     uint32_t sc;
1526
1527     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1528         sc != debug_inst_opcode ||
1529         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1530                             sizeof(sc), 1)) {
1531         return -EINVAL;
1532     }
1533
1534     return 0;
1535 }
1536
1537 static int find_hw_breakpoint(target_ulong addr, int type)
1538 {
1539     int n;
1540
1541     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1542            <= ARRAY_SIZE(hw_debug_points));
1543
1544     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1545         if (hw_debug_points[n].addr == addr &&
1546              hw_debug_points[n].type == type) {
1547             return n;
1548         }
1549     }
1550
1551     return -1;
1552 }
1553
1554 static int find_hw_watchpoint(target_ulong addr, int *flag)
1555 {
1556     int n;
1557
1558     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1559     if (n >= 0) {
1560         *flag = BP_MEM_ACCESS;
1561         return n;
1562     }
1563
1564     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1565     if (n >= 0) {
1566         *flag = BP_MEM_WRITE;
1567         return n;
1568     }
1569
1570     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1571     if (n >= 0) {
1572         *flag = BP_MEM_READ;
1573         return n;
1574     }
1575
1576     return -1;
1577 }
1578
1579 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1580                                   target_ulong len, int type)
1581 {
1582     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1583         return -ENOBUFS;
1584     }
1585
1586     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1587     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1588
1589     switch (type) {
1590     case GDB_BREAKPOINT_HW:
1591         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1592             return -ENOBUFS;
1593         }
1594
1595         if (find_hw_breakpoint(addr, type) >= 0) {
1596             return -EEXIST;
1597         }
1598
1599         nb_hw_breakpoint++;
1600         break;
1601
1602     case GDB_WATCHPOINT_WRITE:
1603     case GDB_WATCHPOINT_READ:
1604     case GDB_WATCHPOINT_ACCESS:
1605         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1606             return -ENOBUFS;
1607         }
1608
1609         if (find_hw_breakpoint(addr, type) >= 0) {
1610             return -EEXIST;
1611         }
1612
1613         nb_hw_watchpoint++;
1614         break;
1615
1616     default:
1617         return -ENOSYS;
1618     }
1619
1620     return 0;
1621 }
1622
1623 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1624                                   target_ulong len, int type)
1625 {
1626     int n;
1627
1628     n = find_hw_breakpoint(addr, type);
1629     if (n < 0) {
1630         return -ENOENT;
1631     }
1632
1633     switch (type) {
1634     case GDB_BREAKPOINT_HW:
1635         nb_hw_breakpoint--;
1636         break;
1637
1638     case GDB_WATCHPOINT_WRITE:
1639     case GDB_WATCHPOINT_READ:
1640     case GDB_WATCHPOINT_ACCESS:
1641         nb_hw_watchpoint--;
1642         break;
1643
1644     default:
1645         return -ENOSYS;
1646     }
1647     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1648
1649     return 0;
1650 }
1651
1652 void kvm_arch_remove_all_hw_breakpoints(void)
1653 {
1654     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1655 }
1656
1657 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1658 {
1659     int n;
1660
1661     /* Software Breakpoint updates */
1662     if (kvm_sw_breakpoints_active(cs)) {
1663         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1664     }
1665
1666     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1667            <= ARRAY_SIZE(hw_debug_points));
1668     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1669
1670     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1671         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1672         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1673         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1674             switch (hw_debug_points[n].type) {
1675             case GDB_BREAKPOINT_HW:
1676                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1677                 break;
1678             case GDB_WATCHPOINT_WRITE:
1679                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1680                 break;
1681             case GDB_WATCHPOINT_READ:
1682                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1683                 break;
1684             case GDB_WATCHPOINT_ACCESS:
1685                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1686                                         KVMPPC_DEBUG_WATCH_READ;
1687                 break;
1688             default:
1689                 cpu_abort(cs, "Unsupported breakpoint type\n");
1690             }
1691             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1692         }
1693     }
1694 }
1695
1696 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1697 {
1698     CPUState *cs = CPU(cpu);
1699     CPUPPCState *env = &cpu->env;
1700     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1701     int handle = 0;
1702     int n;
1703     int flag = 0;
1704
1705     if (cs->singlestep_enabled) {
1706         handle = 1;
1707     } else if (arch_info->status) {
1708         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1709             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1710                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1711                 if (n >= 0) {
1712                     handle = 1;
1713                 }
1714             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1715                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1716                 n = find_hw_watchpoint(arch_info->address,  &flag);
1717                 if (n >= 0) {
1718                     handle = 1;
1719                     cs->watchpoint_hit = &hw_watchpoint;
1720                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1721                     hw_watchpoint.flags = flag;
1722                 }
1723             }
1724         }
1725     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1726         handle = 1;
1727     } else {
1728         /* QEMU is not able to handle debug exception, so inject
1729          * program exception to guest;
1730          * Yes program exception NOT debug exception !!
1731          * When QEMU is using debug resources then debug exception must
1732          * be always set. To achieve this we set MSR_DE and also set
1733          * MSRP_DEP so guest cannot change MSR_DE.
1734          * When emulating debug resource for guest we want guest
1735          * to control MSR_DE (enable/disable debug interrupt on need).
1736          * Supporting both configurations are NOT possible.
1737          * So the result is that we cannot share debug resources
1738          * between QEMU and Guest on BOOKE architecture.
1739          * In the current design QEMU gets the priority over guest,
1740          * this means that if QEMU is using debug resources then guest
1741          * cannot use them;
1742          * For software breakpoint QEMU uses a privileged instruction;
1743          * So there cannot be any reason that we are here for guest
1744          * set debug exception, only possibility is guest executed a
1745          * privileged / illegal instruction and that's why we are
1746          * injecting a program interrupt.
1747          */
1748
1749         cpu_synchronize_state(cs);
1750         /* env->nip is PC, so increment this by 4 to use
1751          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1752          */
1753         env->nip += 4;
1754         cs->exception_index = POWERPC_EXCP_PROGRAM;
1755         env->error_code = POWERPC_EXCP_INVAL;
1756         ppc_cpu_do_interrupt(cs);
1757     }
1758
1759     return handle;
1760 }
1761
1762 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1763 {
1764     PowerPCCPU *cpu = POWERPC_CPU(cs);
1765     CPUPPCState *env = &cpu->env;
1766     int ret;
1767
1768     qemu_mutex_lock_iothread();
1769
1770     switch (run->exit_reason) {
1771     case KVM_EXIT_DCR:
1772         if (run->dcr.is_write) {
1773             DPRINTF("handle dcr write\n");
1774             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1775         } else {
1776             DPRINTF("handle dcr read\n");
1777             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1778         }
1779         break;
1780     case KVM_EXIT_HLT:
1781         DPRINTF("handle halt\n");
1782         ret = kvmppc_handle_halt(cpu);
1783         break;
1784 #if defined(TARGET_PPC64)
1785     case KVM_EXIT_PAPR_HCALL:
1786         DPRINTF("handle PAPR hypercall\n");
1787         run->papr_hcall.ret = spapr_hypercall(cpu,
1788                                               run->papr_hcall.nr,
1789                                               run->papr_hcall.args);
1790         ret = 0;
1791         break;
1792 #endif
1793     case KVM_EXIT_EPR:
1794         DPRINTF("handle epr\n");
1795         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1796         ret = 0;
1797         break;
1798     case KVM_EXIT_WATCHDOG:
1799         DPRINTF("handle watchdog expiry\n");
1800         watchdog_perform_action();
1801         ret = 0;
1802         break;
1803
1804     case KVM_EXIT_DEBUG:
1805         DPRINTF("handle debug exception\n");
1806         if (kvm_handle_debug(cpu, run)) {
1807             ret = EXCP_DEBUG;
1808             break;
1809         }
1810         /* re-enter, this exception was guest-internal */
1811         ret = 0;
1812         break;
1813
1814     default:
1815         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1816         ret = -1;
1817         break;
1818     }
1819
1820     qemu_mutex_unlock_iothread();
1821     return ret;
1822 }
1823
1824 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1825 {
1826     CPUState *cs = CPU(cpu);
1827     uint32_t bits = tsr_bits;
1828     struct kvm_one_reg reg = {
1829         .id = KVM_REG_PPC_OR_TSR,
1830         .addr = (uintptr_t) &bits,
1831     };
1832
1833     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1834 }
1835
1836 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1837 {
1838
1839     CPUState *cs = CPU(cpu);
1840     uint32_t bits = tsr_bits;
1841     struct kvm_one_reg reg = {
1842         .id = KVM_REG_PPC_CLEAR_TSR,
1843         .addr = (uintptr_t) &bits,
1844     };
1845
1846     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1847 }
1848
1849 int kvmppc_set_tcr(PowerPCCPU *cpu)
1850 {
1851     CPUState *cs = CPU(cpu);
1852     CPUPPCState *env = &cpu->env;
1853     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1854
1855     struct kvm_one_reg reg = {
1856         .id = KVM_REG_PPC_TCR,
1857         .addr = (uintptr_t) &tcr,
1858     };
1859
1860     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1861 }
1862
1863 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1864 {
1865     CPUState *cs = CPU(cpu);
1866     int ret;
1867
1868     if (!kvm_enabled()) {
1869         return -1;
1870     }
1871
1872     if (!cap_ppc_watchdog) {
1873         printf("warning: KVM does not support watchdog");
1874         return -1;
1875     }
1876
1877     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1878     if (ret < 0) {
1879         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1880                 __func__, strerror(-ret));
1881         return ret;
1882     }
1883
1884     return ret;
1885 }
1886
1887 static int read_cpuinfo(const char *field, char *value, int len)
1888 {
1889     FILE *f;
1890     int ret = -1;
1891     int field_len = strlen(field);
1892     char line[512];
1893
1894     f = fopen("/proc/cpuinfo", "r");
1895     if (!f) {
1896         return -1;
1897     }
1898
1899     do {
1900         if (!fgets(line, sizeof(line), f)) {
1901             break;
1902         }
1903         if (!strncmp(line, field, field_len)) {
1904             pstrcpy(value, len, line);
1905             ret = 0;
1906             break;
1907         }
1908     } while(*line);
1909
1910     fclose(f);
1911
1912     return ret;
1913 }
1914
1915 uint32_t kvmppc_get_tbfreq(void)
1916 {
1917     char line[512];
1918     char *ns;
1919     uint32_t retval = NANOSECONDS_PER_SECOND;
1920
1921     if (read_cpuinfo("timebase", line, sizeof(line))) {
1922         return retval;
1923     }
1924
1925     if (!(ns = strchr(line, ':'))) {
1926         return retval;
1927     }
1928
1929     ns++;
1930
1931     return atoi(ns);
1932 }
1933
1934 bool kvmppc_get_host_serial(char **value)
1935 {
1936     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1937                                NULL);
1938 }
1939
1940 bool kvmppc_get_host_model(char **value)
1941 {
1942     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1943 }
1944
1945 /* Try to find a device tree node for a CPU with clock-frequency property */
1946 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1947 {
1948     struct dirent *dirp;
1949     DIR *dp;
1950
1951     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1952         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1953         return -1;
1954     }
1955
1956     buf[0] = '\0';
1957     while ((dirp = readdir(dp)) != NULL) {
1958         FILE *f;
1959         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1960                  dirp->d_name);
1961         f = fopen(buf, "r");
1962         if (f) {
1963             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1964             fclose(f);
1965             break;
1966         }
1967         buf[0] = '\0';
1968     }
1969     closedir(dp);
1970     if (buf[0] == '\0') {
1971         printf("Unknown host!\n");
1972         return -1;
1973     }
1974
1975     return 0;
1976 }
1977
1978 static uint64_t kvmppc_read_int_dt(const char *filename)
1979 {
1980     union {
1981         uint32_t v32;
1982         uint64_t v64;
1983     } u;
1984     FILE *f;
1985     int len;
1986
1987     f = fopen(filename, "rb");
1988     if (!f) {
1989         return -1;
1990     }
1991
1992     len = fread(&u, 1, sizeof(u), f);
1993     fclose(f);
1994     switch (len) {
1995     case 4:
1996         /* property is a 32-bit quantity */
1997         return be32_to_cpu(u.v32);
1998     case 8:
1999         return be64_to_cpu(u.v64);
2000     }
2001
2002     return 0;
2003 }
2004
2005 /* Read a CPU node property from the host device tree that's a single
2006  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
2007  * (can't find or open the property, or doesn't understand the
2008  * format) */
2009 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
2010 {
2011     char buf[PATH_MAX], *tmp;
2012     uint64_t val;
2013
2014     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2015         return -1;
2016     }
2017
2018     tmp = g_strdup_printf("%s/%s", buf, propname);
2019     val = kvmppc_read_int_dt(tmp);
2020     g_free(tmp);
2021
2022     return val;
2023 }
2024
2025 uint64_t kvmppc_get_clockfreq(void)
2026 {
2027     return kvmppc_read_int_cpu_dt("clock-frequency");
2028 }
2029
2030 uint32_t kvmppc_get_vmx(void)
2031 {
2032     return kvmppc_read_int_cpu_dt("ibm,vmx");
2033 }
2034
2035 uint32_t kvmppc_get_dfp(void)
2036 {
2037     return kvmppc_read_int_cpu_dt("ibm,dfp");
2038 }
2039
2040 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2041  {
2042      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2043      CPUState *cs = CPU(cpu);
2044
2045     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2046         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2047         return 0;
2048     }
2049
2050     return 1;
2051 }
2052
2053 int kvmppc_get_hasidle(CPUPPCState *env)
2054 {
2055     struct kvm_ppc_pvinfo pvinfo;
2056
2057     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2058         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2059         return 1;
2060     }
2061
2062     return 0;
2063 }
2064
2065 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2066 {
2067     uint32_t *hc = (uint32_t*)buf;
2068     struct kvm_ppc_pvinfo pvinfo;
2069
2070     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2071         memcpy(buf, pvinfo.hcall, buf_len);
2072         return 0;
2073     }
2074
2075     /*
2076      * Fallback to always fail hypercalls regardless of endianness:
2077      *
2078      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2079      *     li r3, -1
2080      *     b .+8       (becomes nop in wrong endian)
2081      *     bswap32(li r3, -1)
2082      */
2083
2084     hc[0] = cpu_to_be32(0x08000048);
2085     hc[1] = cpu_to_be32(0x3860ffff);
2086     hc[2] = cpu_to_be32(0x48000008);
2087     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2088
2089     return 1;
2090 }
2091
2092 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2093 {
2094     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2095 }
2096
2097 void kvmppc_enable_logical_ci_hcalls(void)
2098 {
2099     /*
2100      * FIXME: it would be nice if we could detect the cases where
2101      * we're using a device which requires the in kernel
2102      * implementation of these hcalls, but the kernel lacks them and
2103      * produce a warning.
2104      */
2105     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2106     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2107 }
2108
2109 void kvmppc_enable_set_mode_hcall(void)
2110 {
2111     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2112 }
2113
2114 void kvmppc_enable_clear_ref_mod_hcalls(void)
2115 {
2116     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2117     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2118 }
2119
2120 void kvmppc_set_papr(PowerPCCPU *cpu)
2121 {
2122     CPUState *cs = CPU(cpu);
2123     int ret;
2124
2125     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2126     if (ret) {
2127         error_report("This vCPU type or KVM version does not support PAPR");
2128         exit(1);
2129     }
2130
2131     /* Update the capability flag so we sync the right information
2132      * with kvm */
2133     cap_papr = 1;
2134 }
2135
2136 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2137 {
2138     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2139 }
2140
2141 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2142 {
2143     CPUState *cs = CPU(cpu);
2144     int ret;
2145
2146     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2147     if (ret && mpic_proxy) {
2148         error_report("This KVM version does not support EPR");
2149         exit(1);
2150     }
2151 }
2152
2153 int kvmppc_smt_threads(void)
2154 {
2155     return cap_ppc_smt ? cap_ppc_smt : 1;
2156 }
2157
2158 #ifdef TARGET_PPC64
2159 off_t kvmppc_alloc_rma(void **rma)
2160 {
2161     off_t size;
2162     int fd;
2163     struct kvm_allocate_rma ret;
2164
2165     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2166      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2167      *                      not necessary on this hardware
2168      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2169      *
2170      * FIXME: We should allow the user to force contiguous RMA
2171      * allocation in the cap_ppc_rma==1 case.
2172      */
2173     if (cap_ppc_rma < 2) {
2174         return 0;
2175     }
2176
2177     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2178     if (fd < 0) {
2179         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2180                 strerror(errno));
2181         return -1;
2182     }
2183
2184     size = MIN(ret.rma_size, 256ul << 20);
2185
2186     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2187     if (*rma == MAP_FAILED) {
2188         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2189         return -1;
2190     };
2191
2192     return size;
2193 }
2194
2195 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2196 {
2197     struct kvm_ppc_smmu_info info;
2198     long rampagesize, best_page_shift;
2199     int i;
2200
2201     if (cap_ppc_rma >= 2) {
2202         return current_size;
2203     }
2204
2205     /* Find the largest hardware supported page size that's less than
2206      * or equal to the (logical) backing page size of guest RAM */
2207     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2208     rampagesize = getrampagesize();
2209     best_page_shift = 0;
2210
2211     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2212         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2213
2214         if (!sps->page_shift) {
2215             continue;
2216         }
2217
2218         if ((sps->page_shift > best_page_shift)
2219             && ((1UL << sps->page_shift) <= rampagesize)) {
2220             best_page_shift = sps->page_shift;
2221         }
2222     }
2223
2224     return MIN(current_size,
2225                1ULL << (best_page_shift + hash_shift - 7));
2226 }
2227 #endif
2228
2229 bool kvmppc_spapr_use_multitce(void)
2230 {
2231     return cap_spapr_multitce;
2232 }
2233
2234 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2235                               bool need_vfio)
2236 {
2237     struct kvm_create_spapr_tce args = {
2238         .liobn = liobn,
2239         .window_size = window_size,
2240     };
2241     long len;
2242     int fd;
2243     void *table;
2244
2245     /* Must set fd to -1 so we don't try to munmap when called for
2246      * destroying the table, which the upper layers -will- do
2247      */
2248     *pfd = -1;
2249     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2250         return NULL;
2251     }
2252
2253     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2254     if (fd < 0) {
2255         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2256                 liobn);
2257         return NULL;
2258     }
2259
2260     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2261     /* FIXME: round this up to page size */
2262
2263     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2264     if (table == MAP_FAILED) {
2265         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2266                 liobn);
2267         close(fd);
2268         return NULL;
2269     }
2270
2271     *pfd = fd;
2272     return table;
2273 }
2274
2275 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2276 {
2277     long len;
2278
2279     if (fd < 0) {
2280         return -1;
2281     }
2282
2283     len = nb_table * sizeof(uint64_t);
2284     if ((munmap(table, len) < 0) ||
2285         (close(fd) < 0)) {
2286         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2287                 strerror(errno));
2288         /* Leak the table */
2289     }
2290
2291     return 0;
2292 }
2293
2294 int kvmppc_reset_htab(int shift_hint)
2295 {
2296     uint32_t shift = shift_hint;
2297
2298     if (!kvm_enabled()) {
2299         /* Full emulation, tell caller to allocate htab itself */
2300         return 0;
2301     }
2302     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2303         int ret;
2304         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2305         if (ret == -ENOTTY) {
2306             /* At least some versions of PR KVM advertise the
2307              * capability, but don't implement the ioctl().  Oops.
2308              * Return 0 so that we allocate the htab in qemu, as is
2309              * correct for PR. */
2310             return 0;
2311         } else if (ret < 0) {
2312             return ret;
2313         }
2314         return shift;
2315     }
2316
2317     /* We have a kernel that predates the htab reset calls.  For PR
2318      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2319      * this era, it has allocated a 16MB fixed size hash table already. */
2320     if (kvmppc_is_pr(kvm_state)) {
2321         /* PR - tell caller to allocate htab */
2322         return 0;
2323     } else {
2324         /* HV - assume 16MB kernel allocated htab */
2325         return 24;
2326     }
2327 }
2328
2329 static inline uint32_t mfpvr(void)
2330 {
2331     uint32_t pvr;
2332
2333     asm ("mfpvr %0"
2334          : "=r"(pvr));
2335     return pvr;
2336 }
2337
2338 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2339 {
2340     if (on) {
2341         *word |= flags;
2342     } else {
2343         *word &= ~flags;
2344     }
2345 }
2346
2347 static void kvmppc_host_cpu_initfn(Object *obj)
2348 {
2349     assert(kvm_enabled());
2350 }
2351
2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2353 {
2354     DeviceClass *dc = DEVICE_CLASS(oc);
2355     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356     uint32_t vmx = kvmppc_get_vmx();
2357     uint32_t dfp = kvmppc_get_dfp();
2358     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2359     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2360
2361     /* Now fix up the class with information we can query from the host */
2362     pcc->pvr = mfpvr();
2363
2364     if (vmx != -1) {
2365         /* Only override when we know what the host supports */
2366         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2367         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2368     }
2369     if (dfp != -1) {
2370         /* Only override when we know what the host supports */
2371         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2372     }
2373
2374     if (dcache_size != -1) {
2375         pcc->l1_dcache_size = dcache_size;
2376     }
2377
2378     if (icache_size != -1) {
2379         pcc->l1_icache_size = icache_size;
2380     }
2381
2382     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2383     dc->cannot_destroy_with_object_finalize_yet = true;
2384 }
2385
2386 bool kvmppc_has_cap_epr(void)
2387 {
2388     return cap_epr;
2389 }
2390
2391 bool kvmppc_has_cap_htab_fd(void)
2392 {
2393     return cap_htab_fd;
2394 }
2395
2396 bool kvmppc_has_cap_fixup_hcalls(void)
2397 {
2398     return cap_fixup_hcalls;
2399 }
2400
2401 bool kvmppc_has_cap_htm(void)
2402 {
2403     return cap_htm;
2404 }
2405
2406 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2407 {
2408     ObjectClass *oc = OBJECT_CLASS(pcc);
2409
2410     while (oc && !object_class_is_abstract(oc)) {
2411         oc = object_class_get_parent(oc);
2412     }
2413     assert(oc);
2414
2415     return POWERPC_CPU_CLASS(oc);
2416 }
2417
2418 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2419 {
2420     uint32_t host_pvr = mfpvr();
2421     PowerPCCPUClass *pvr_pcc;
2422
2423     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2424     if (pvr_pcc == NULL) {
2425         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2426     }
2427
2428     return pvr_pcc;
2429 }
2430
2431 static int kvm_ppc_register_host_cpu_type(void)
2432 {
2433     TypeInfo type_info = {
2434         .name = TYPE_HOST_POWERPC_CPU,
2435         .instance_init = kvmppc_host_cpu_initfn,
2436         .class_init = kvmppc_host_cpu_class_init,
2437     };
2438     PowerPCCPUClass *pvr_pcc;
2439     DeviceClass *dc;
2440     int i;
2441
2442     pvr_pcc = kvm_ppc_get_host_cpu_class();
2443     if (pvr_pcc == NULL) {
2444         return -1;
2445     }
2446     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2447     type_register(&type_info);
2448
2449 #if defined(TARGET_PPC64)
2450     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2451     type_info.parent = TYPE_SPAPR_CPU_CORE,
2452     type_info.instance_size = sizeof(sPAPRCPUCore);
2453     type_info.instance_init = NULL;
2454     type_info.class_init = spapr_cpu_core_class_init;
2455     type_info.class_data = (void *) "host";
2456     type_register(&type_info);
2457     g_free((void *)type_info.name);
2458 #endif
2459
2460     /*
2461      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2462      * we want "POWER8" to be a "family" alias that points to the current
2463      * host CPU type, too)
2464      */
2465     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2466     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2467         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2468             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2469             char *suffix;
2470
2471             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2472             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2473             if (suffix) {
2474                 *suffix = 0;
2475             }
2476             ppc_cpu_aliases[i].oc = oc;
2477             break;
2478         }
2479     }
2480
2481     return 0;
2482 }
2483
2484 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2485 {
2486     struct kvm_rtas_token_args args = {
2487         .token = token,
2488     };
2489
2490     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2491         return -ENOENT;
2492     }
2493
2494     strncpy(args.name, function, sizeof(args.name));
2495
2496     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2497 }
2498
2499 int kvmppc_get_htab_fd(bool write)
2500 {
2501     struct kvm_get_htab_fd s = {
2502         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2503         .start_index = 0,
2504     };
2505
2506     if (!cap_htab_fd) {
2507         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2508         return -1;
2509     }
2510
2511     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2512 }
2513
2514 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2515 {
2516     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2517     uint8_t buf[bufsize];
2518     ssize_t rc;
2519
2520     do {
2521         rc = read(fd, buf, bufsize);
2522         if (rc < 0) {
2523             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2524                     strerror(errno));
2525             return rc;
2526         } else if (rc) {
2527             uint8_t *buffer = buf;
2528             ssize_t n = rc;
2529             while (n) {
2530                 struct kvm_get_htab_header *head =
2531                     (struct kvm_get_htab_header *) buffer;
2532                 size_t chunksize = sizeof(*head) +
2533                      HASH_PTE_SIZE_64 * head->n_valid;
2534
2535                 qemu_put_be32(f, head->index);
2536                 qemu_put_be16(f, head->n_valid);
2537                 qemu_put_be16(f, head->n_invalid);
2538                 qemu_put_buffer(f, (void *)(head + 1),
2539                                 HASH_PTE_SIZE_64 * head->n_valid);
2540
2541                 buffer += chunksize;
2542                 n -= chunksize;
2543             }
2544         }
2545     } while ((rc != 0)
2546              && ((max_ns < 0)
2547                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2548
2549     return (rc == 0) ? 1 : 0;
2550 }
2551
2552 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2553                            uint16_t n_valid, uint16_t n_invalid)
2554 {
2555     struct kvm_get_htab_header *buf;
2556     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2557     ssize_t rc;
2558
2559     buf = alloca(chunksize);
2560     buf->index = index;
2561     buf->n_valid = n_valid;
2562     buf->n_invalid = n_invalid;
2563
2564     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2565
2566     rc = write(fd, buf, chunksize);
2567     if (rc < 0) {
2568         fprintf(stderr, "Error writing KVM hash table: %s\n",
2569                 strerror(errno));
2570         return rc;
2571     }
2572     if (rc != chunksize) {
2573         /* We should never get a short write on a single chunk */
2574         fprintf(stderr, "Short write, restoring KVM hash table\n");
2575         return -1;
2576     }
2577     return 0;
2578 }
2579
2580 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2581 {
2582     return true;
2583 }
2584
2585 void kvm_arch_init_irq_routing(KVMState *s)
2586 {
2587 }
2588
2589 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2590 {
2591     struct kvm_get_htab_fd ghf = {
2592         .flags = 0,
2593         .start_index = ptex,
2594     };
2595     int fd, rc;
2596     int i;
2597
2598     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2599     if (fd < 0) {
2600         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2601     }
2602
2603     i = 0;
2604     while (i < n) {
2605         struct kvm_get_htab_header *hdr;
2606         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2607         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2608
2609         rc = read(fd, buf, sizeof(buf));
2610         if (rc < 0) {
2611             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2612         }
2613
2614         hdr = (struct kvm_get_htab_header *)buf;
2615         while ((i < n) && ((char *)hdr < (buf + rc))) {
2616             int invalid = hdr->n_invalid;
2617
2618             if (hdr->index != (ptex + i)) {
2619                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2620                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2621             }
2622
2623             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2624             i += hdr->n_valid;
2625
2626             if ((n - i) < invalid) {
2627                 invalid = n - i;
2628             }
2629             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2630             i += hdr->n_invalid;
2631
2632             hdr = (struct kvm_get_htab_header *)
2633                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2634         }
2635     }
2636
2637     close(fd);
2638 }
2639
2640 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2641 {
2642     int fd, rc;
2643     struct kvm_get_htab_fd ghf;
2644     struct {
2645         struct kvm_get_htab_header hdr;
2646         uint64_t pte0;
2647         uint64_t pte1;
2648     } buf;
2649
2650     ghf.flags = 0;
2651     ghf.start_index = 0;     /* Ignored */
2652     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2653     if (fd < 0) {
2654         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2655     }
2656
2657     buf.hdr.n_valid = 1;
2658     buf.hdr.n_invalid = 0;
2659     buf.hdr.index = ptex;
2660     buf.pte0 = cpu_to_be64(pte0);
2661     buf.pte1 = cpu_to_be64(pte1);
2662
2663     rc = write(fd, &buf, sizeof(buf));
2664     if (rc != sizeof(buf)) {
2665         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2666     }
2667     close(fd);
2668 }
2669
2670 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2671                              uint64_t address, uint32_t data, PCIDevice *dev)
2672 {
2673     return 0;
2674 }
2675
2676 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2677                                 int vector, PCIDevice *dev)
2678 {
2679     return 0;
2680 }
2681
2682 int kvm_arch_release_virq_post(int virq)
2683 {
2684     return 0;
2685 }
2686
2687 int kvm_arch_msi_data_to_gsi(uint32_t data)
2688 {
2689     return data & 0xffff;
2690 }
2691
2692 int kvmppc_enable_hwrng(void)
2693 {
2694     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2695         return -1;
2696     }
2697
2698     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2699 }