target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41
  42 //#define DEBUG_KVM
  43
  44 #ifdef DEBUG_KVM
  45 #define DPRINTF(fmt, ...) \
  46     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  47 #else
  48 #define DPRINTF(fmt, ...) \
  49     do { } while (0)
  50 #endif
  51
  52 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  53
  54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  55     KVM_CAP_LAST_INFO
  56 };
  57
  58 static int cap_interrupt_unset = false;
  59 static int cap_interrupt_level = false;
  60 static int cap_segstate;
  61 static int cap_booke_sregs;
  62 static int cap_ppc_smt;
  63 static int cap_ppc_rma;
  64 static int cap_spapr_tce;
  65 static int cap_hior;
  66 static int cap_one_reg;
  67 static int cap_epr;
  68 static int cap_ppc_watchdog;
  69 static int cap_papr;
  70 static int cap_htab_fd;
  71
  72 /* XXX We have a race condition where we actually have a level triggered
  73  *     interrupt, but the infrastructure can't expose that yet, so the guest
  74  *     takes but ignores it, goes to sleep and never gets notified that there's
  75  *     still an interrupt pending.
  76  *
  77  *     As a quick workaround, let's just wake up again 20 ms after we injected
  78  *     an interrupt. That way we can assure that we're always reinjecting
  79  *     interrupts in case the guest swallowed them.
  80  */
  81 static QEMUTimer *idle_timer;
  82
  83 static void kvm_kick_cpu(void *opaque)
  84 {
  85     PowerPCCPU *cpu = opaque;
  86
  87     qemu_cpu_kick(CPU(cpu));
  88 }
  89
  90 static int kvm_ppc_register_host_cpu_type(void);
  91
  92 int kvm_arch_init(KVMState *s)
  93 {
  94     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  95     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  96     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  97     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  98     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  99     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 100     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 101     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 102     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 103     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 104     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 105     /* Note: we don't set cap_papr here, because this capability is
 106      * only activated after this by kvmppc_set_papr() */
 107     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 108
 109     if (!cap_interrupt_level) {
 110         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 111                         "VM to stall at times!\n");
 112     }
 113
 114     kvm_ppc_register_host_cpu_type();
 115
 116     return 0;
 117 }
 118
 119 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 120 {
 121     CPUPPCState *cenv = &cpu->env;
 122     CPUState *cs = CPU(cpu);
 123     struct kvm_sregs sregs;
 124     int ret;
 125
 126     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 127         /* What we're really trying to say is "if we're on BookE, we use
 128            the native PVR for now". This is the only sane way to check
 129            it though, so we potentially confuse users that they can run
 130            BookE guests on BookS. Let's hope nobody dares enough :) */
 131         return 0;
 132     } else {
 133         if (!cap_segstate) {
 134             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 135             return -ENOSYS;
 136         }
 137     }
 138
 139     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 140     if (ret) {
 141         return ret;
 142     }
 143
 144     sregs.pvr = cenv->spr[SPR_PVR];
 145     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 146 }
 147
 148 /* Set up a shared TLB array with KVM */
 149 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 150 {
 151     CPUPPCState *env = &cpu->env;
 152     CPUState *cs = CPU(cpu);
 153     struct kvm_book3e_206_tlb_params params = {};
 154     struct kvm_config_tlb cfg = {};
 155     unsigned int entries = 0;
 156     int ret, i;
 157
 158     if (!kvm_enabled() ||
 159         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 160         return 0;
 161     }
 162
 163     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 164
 165     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 166         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 167         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 168         entries += params.tlb_sizes[i];
 169     }
 170
 171     assert(entries == env->nb_tlb);
 172     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 173
 174     env->tlb_dirty = true;
 175
 176     cfg.array = (uintptr_t)env->tlb.tlbm;
 177     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 178     cfg.params = (uintptr_t)&params;
 179     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 180
 181     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 182     if (ret < 0) {
 183         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 184                 __func__, strerror(-ret));
 185         return ret;
 186     }
 187
 188     env->kvm_sw_tlb = true;
 189     return 0;
 190 }
 191
 192
 193 #if defined(TARGET_PPC64)
 194 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 195                                        struct kvm_ppc_smmu_info *info)
 196 {
 197     CPUPPCState *env = &cpu->env;
 198     CPUState *cs = CPU(cpu);
 199
 200     memset(info, 0, sizeof(*info));
 201
 202     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 203      * need to "guess" what the supported page sizes are.
 204      *
 205      * For that to work we make a few assumptions:
 206      *
 207      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 208      *   KVM which only supports 4K and 16M pages, but supports them
 209      *   regardless of the backing store characteritics. We also don't
 210      *   support 1T segments.
 211      *
 212      *   This is safe as if HV KVM ever supports that capability or PR
 213      *   KVM grows supports for more page/segment sizes, those versions
 214      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 215      *   will not hit this fallback
 216      *
 217      * - Else we are running HV KVM. This means we only support page
 218      *   sizes that fit in the backing store. Additionally we only
 219      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 220      *   P7 encodings for the SLB and hash table. Here too, we assume
 221      *   support for any newer processor will mean a kernel that
 222      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 223      *   this fallback.
 224      */
 225     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 226         /* No flags */
 227         info->flags = 0;
 228         info->slb_size = 64;
 229
 230         /* Standard 4k base page size segment */
 231         info->sps[0].page_shift = 12;
 232         info->sps[0].slb_enc = 0;
 233         info->sps[0].enc[0].page_shift = 12;
 234         info->sps[0].enc[0].pte_enc = 0;
 235
 236         /* Standard 16M large page size segment */
 237         info->sps[1].page_shift = 24;
 238         info->sps[1].slb_enc = SLB_VSID_L;
 239         info->sps[1].enc[0].page_shift = 24;
 240         info->sps[1].enc[0].pte_enc = 0;
 241     } else {
 242         int i = 0;
 243
 244         /* HV KVM has backing store size restrictions */
 245         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 246
 247         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 248             info->flags |= KVM_PPC_1T_SEGMENTS;
 249         }
 250
 251         if (env->mmu_model == POWERPC_MMU_2_06) {
 252             info->slb_size = 32;
 253         } else {
 254             info->slb_size = 64;
 255         }
 256
 257         /* Standard 4k base page size segment */
 258         info->sps[i].page_shift = 12;
 259         info->sps[i].slb_enc = 0;
 260         info->sps[i].enc[0].page_shift = 12;
 261         info->sps[i].enc[0].pte_enc = 0;
 262         i++;
 263
 264         /* 64K on MMU 2.06 */
 265         if (env->mmu_model == POWERPC_MMU_2_06) {
 266             info->sps[i].page_shift = 16;
 267             info->sps[i].slb_enc = 0x110;
 268             info->sps[i].enc[0].page_shift = 16;
 269             info->sps[i].enc[0].pte_enc = 1;
 270             i++;
 271         }
 272
 273         /* Standard 16M large page size segment */
 274         info->sps[i].page_shift = 24;
 275         info->sps[i].slb_enc = SLB_VSID_L;
 276         info->sps[i].enc[0].page_shift = 24;
 277         info->sps[i].enc[0].pte_enc = 0;
 278     }
 279 }
 280
 281 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 282 {
 283     CPUState *cs = CPU(cpu);
 284     int ret;
 285
 286     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 287         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 288         if (ret == 0) {
 289             return;
 290         }
 291     }
 292
 293     kvm_get_fallback_smmu_info(cpu, info);
 294 }
 295
 296 static long getrampagesize(void)
 297 {
 298     struct statfs fs;
 299     int ret;
 300
 301     if (!mem_path) {
 302         /* guest RAM is backed by normal anonymous pages */
 303         return getpagesize();
 304     }
 305
 306     do {
 307         ret = statfs(mem_path, &fs);
 308     } while (ret != 0 && errno == EINTR);
 309
 310     if (ret != 0) {
 311         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 312                 strerror(errno));
 313         exit(1);
 314     }
 315
 316 #define HUGETLBFS_MAGIC       0x958458f6
 317
 318     if (fs.f_type != HUGETLBFS_MAGIC) {
 319         /* Explicit mempath, but it's ordinary pages */
 320         return getpagesize();
 321     }
 322
 323     /* It's hugepage, return the huge page size */
 324     return fs.f_bsize;
 325 }
 326
 327 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 328 {
 329     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 330         return true;
 331     }
 332
 333     return (1ul << shift) <= rampgsize;
 334 }
 335
 336 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 337 {
 338     static struct kvm_ppc_smmu_info smmu_info;
 339     static bool has_smmu_info;
 340     CPUPPCState *env = &cpu->env;
 341     long rampagesize;
 342     int iq, ik, jq, jk;
 343
 344     /* We only handle page sizes for 64-bit server guests for now */
 345     if (!(env->mmu_model & POWERPC_MMU_64)) {
 346         return;
 347     }
 348
 349     /* Collect MMU info from kernel if not already */
 350     if (!has_smmu_info) {
 351         kvm_get_smmu_info(cpu, &smmu_info);
 352         has_smmu_info = true;
 353     }
 354
 355     rampagesize = getrampagesize();
 356
 357     /* Convert to QEMU form */
 358     memset(&env->sps, 0, sizeof(env->sps));
 359
 360     /*
 361      * XXX This loop should be an entry wide AND of the capabilities that
 362      *     the selected CPU has with the capabilities that KVM supports.
 363      */
 364     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 365         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 366         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 367
 368         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 369                                  ksps->page_shift)) {
 370             continue;
 371         }
 372         qsps->page_shift = ksps->page_shift;
 373         qsps->slb_enc = ksps->slb_enc;
 374         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 375             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 376                                      ksps->enc[jk].page_shift)) {
 377                 continue;
 378             }
 379             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 380             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 381             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 382                 break;
 383             }
 384         }
 385         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 386             break;
 387         }
 388     }
 389     env->slb_nr = smmu_info.slb_size;
 390     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 391         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 392     }
 393 }
 394 #else /* defined (TARGET_PPC64) */
 395
 396 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 397 {
 398 }
 399
 400 #endif /* !defined (TARGET_PPC64) */
 401
 402 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 403 {
 404     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 405 }
 406
 407 int kvm_arch_init_vcpu(CPUState *cs)
 408 {
 409     PowerPCCPU *cpu = POWERPC_CPU(cs);
 410     CPUPPCState *cenv = &cpu->env;
 411     int ret;
 412
 413     /* Gather server mmu info from KVM and update the CPU state */
 414     kvm_fixup_page_sizes(cpu);
 415
 416     /* Synchronize sregs with kvm */
 417     ret = kvm_arch_sync_sregs(cpu);
 418     if (ret) {
 419         return ret;
 420     }
 421
 422     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 423
 424     /* Some targets support access to KVM's guest TLB. */
 425     switch (cenv->mmu_model) {
 426     case POWERPC_MMU_BOOKE206:
 427         ret = kvm_booke206_tlb_init(cpu);
 428         break;
 429     default:
 430         break;
 431     }
 432
 433     return ret;
 434 }
 435
 436 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 437 {
 438     CPUPPCState *env = &cpu->env;
 439     CPUState *cs = CPU(cpu);
 440     struct kvm_dirty_tlb dirty_tlb;
 441     unsigned char *bitmap;
 442     int ret;
 443
 444     if (!env->kvm_sw_tlb) {
 445         return;
 446     }
 447
 448     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 449     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 450
 451     dirty_tlb.bitmap = (uintptr_t)bitmap;
 452     dirty_tlb.num_dirty = env->nb_tlb;
 453
 454     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 455     if (ret) {
 456         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 457                 __func__, strerror(-ret));
 458     }
 459
 460     g_free(bitmap);
 461 }
 462
 463 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 464 {
 465     PowerPCCPU *cpu = POWERPC_CPU(cs);
 466     CPUPPCState *env = &cpu->env;
 467     union {
 468         uint32_t u32;
 469         uint64_t u64;
 470     } val;
 471     struct kvm_one_reg reg = {
 472         .id = id,
 473         .addr = (uintptr_t) &val,
 474     };
 475     int ret;
 476
 477     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 478     if (ret != 0) {
 479         trace_kvm_failed_spr_get(spr, strerror(errno));
 480     } else {
 481         switch (id & KVM_REG_SIZE_MASK) {
 482         case KVM_REG_SIZE_U32:
 483             env->spr[spr] = val.u32;
 484             break;
 485
 486         case KVM_REG_SIZE_U64:
 487             env->spr[spr] = val.u64;
 488             break;
 489
 490         default:
 491             /* Don't handle this size yet */
 492             abort();
 493         }
 494     }
 495 }
 496
 497 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 498 {
 499     PowerPCCPU *cpu = POWERPC_CPU(cs);
 500     CPUPPCState *env = &cpu->env;
 501     union {
 502         uint32_t u32;
 503         uint64_t u64;
 504     } val;
 505     struct kvm_one_reg reg = {
 506         .id = id,
 507         .addr = (uintptr_t) &val,
 508     };
 509     int ret;
 510
 511     switch (id & KVM_REG_SIZE_MASK) {
 512     case KVM_REG_SIZE_U32:
 513         val.u32 = env->spr[spr];
 514         break;
 515
 516     case KVM_REG_SIZE_U64:
 517         val.u64 = env->spr[spr];
 518         break;
 519
 520     default:
 521         /* Don't handle this size yet */
 522         abort();
 523     }
 524
 525     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 526     if (ret != 0) {
 527         trace_kvm_failed_spr_set(spr, strerror(errno));
 528     }
 529 }
 530
 531 static int kvm_put_fp(CPUState *cs)
 532 {
 533     PowerPCCPU *cpu = POWERPC_CPU(cs);
 534     CPUPPCState *env = &cpu->env;
 535     struct kvm_one_reg reg;
 536     int i;
 537     int ret;
 538
 539     if (env->insns_flags & PPC_FLOAT) {
 540         uint64_t fpscr = env->fpscr;
 541         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 542
 543         reg.id = KVM_REG_PPC_FPSCR;
 544         reg.addr = (uintptr_t)&fpscr;
 545         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 546         if (ret < 0) {
 547             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 548             return ret;
 549         }
 550
 551         for (i = 0; i < 32; i++) {
 552             uint64_t vsr[2];
 553
 554             vsr[0] = float64_val(env->fpr[i]);
 555             vsr[1] = env->vsr[i];
 556             reg.addr = (uintptr_t) &vsr;
 557             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 558
 559             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 560             if (ret < 0) {
 561                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 562                         i, strerror(errno));
 563                 return ret;
 564             }
 565         }
 566     }
 567
 568     if (env->insns_flags & PPC_ALTIVEC) {
 569         reg.id = KVM_REG_PPC_VSCR;
 570         reg.addr = (uintptr_t)&env->vscr;
 571         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 572         if (ret < 0) {
 573             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 574             return ret;
 575         }
 576
 577         for (i = 0; i < 32; i++) {
 578             reg.id = KVM_REG_PPC_VR(i);
 579             reg.addr = (uintptr_t)&env->avr[i];
 580             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 581             if (ret < 0) {
 582                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 583                 return ret;
 584             }
 585         }
 586     }
 587
 588     return 0;
 589 }
 590
 591 static int kvm_get_fp(CPUState *cs)
 592 {
 593     PowerPCCPU *cpu = POWERPC_CPU(cs);
 594     CPUPPCState *env = &cpu->env;
 595     struct kvm_one_reg reg;
 596     int i;
 597     int ret;
 598
 599     if (env->insns_flags & PPC_FLOAT) {
 600         uint64_t fpscr;
 601         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 602
 603         reg.id = KVM_REG_PPC_FPSCR;
 604         reg.addr = (uintptr_t)&fpscr;
 605         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 606         if (ret < 0) {
 607             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 608             return ret;
 609         } else {
 610             env->fpscr = fpscr;
 611         }
 612
 613         for (i = 0; i < 32; i++) {
 614             uint64_t vsr[2];
 615
 616             reg.addr = (uintptr_t) &vsr;
 617             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 618
 619             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 620             if (ret < 0) {
 621                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 622                         vsx ? "VSR" : "FPR", i, strerror(errno));
 623                 return ret;
 624             } else {
 625                 env->fpr[i] = vsr[0];
 626                 if (vsx) {
 627                     env->vsr[i] = vsr[1];
 628                 }
 629             }
 630         }
 631     }
 632
 633     if (env->insns_flags & PPC_ALTIVEC) {
 634         reg.id = KVM_REG_PPC_VSCR;
 635         reg.addr = (uintptr_t)&env->vscr;
 636         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 637         if (ret < 0) {
 638             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 639             return ret;
 640         }
 641
 642         for (i = 0; i < 32; i++) {
 643             reg.id = KVM_REG_PPC_VR(i);
 644             reg.addr = (uintptr_t)&env->avr[i];
 645             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 646             if (ret < 0) {
 647                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 648                         i, strerror(errno));
 649                 return ret;
 650             }
 651         }
 652     }
 653
 654     return 0;
 655 }
 656
 657 #if defined(TARGET_PPC64)
 658 static int kvm_get_vpa(CPUState *cs)
 659 {
 660     PowerPCCPU *cpu = POWERPC_CPU(cs);
 661     CPUPPCState *env = &cpu->env;
 662     struct kvm_one_reg reg;
 663     int ret;
 664
 665     reg.id = KVM_REG_PPC_VPA_ADDR;
 666     reg.addr = (uintptr_t)&env->vpa_addr;
 667     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 668     if (ret < 0) {
 669         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 670         return ret;
 671     }
 672
 673     assert((uintptr_t)&env->slb_shadow_size
 674            == ((uintptr_t)&env->slb_shadow_addr + 8));
 675     reg.id = KVM_REG_PPC_VPA_SLB;
 676     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 677     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 678     if (ret < 0) {
 679         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 680                 strerror(errno));
 681         return ret;
 682     }
 683
 684     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 685     reg.id = KVM_REG_PPC_VPA_DTL;
 686     reg.addr = (uintptr_t)&env->dtl_addr;
 687     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 688     if (ret < 0) {
 689         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 690                 strerror(errno));
 691         return ret;
 692     }
 693
 694     return 0;
 695 }
 696
 697 static int kvm_put_vpa(CPUState *cs)
 698 {
 699     PowerPCCPU *cpu = POWERPC_CPU(cs);
 700     CPUPPCState *env = &cpu->env;
 701     struct kvm_one_reg reg;
 702     int ret;
 703
 704     /* SLB shadow or DTL can't be registered unless a master VPA is
 705      * registered.  That means when restoring state, if a VPA *is*
 706      * registered, we need to set that up first.  If not, we need to
 707      * deregister the others before deregistering the master VPA */
 708     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 709
 710     if (env->vpa_addr) {
 711         reg.id = KVM_REG_PPC_VPA_ADDR;
 712         reg.addr = (uintptr_t)&env->vpa_addr;
 713         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 714         if (ret < 0) {
 715             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 716             return ret;
 717         }
 718     }
 719
 720     assert((uintptr_t)&env->slb_shadow_size
 721            == ((uintptr_t)&env->slb_shadow_addr + 8));
 722     reg.id = KVM_REG_PPC_VPA_SLB;
 723     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 724     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 725     if (ret < 0) {
 726         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 727         return ret;
 728     }
 729
 730     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 731     reg.id = KVM_REG_PPC_VPA_DTL;
 732     reg.addr = (uintptr_t)&env->dtl_addr;
 733     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 734     if (ret < 0) {
 735         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 736                 strerror(errno));
 737         return ret;
 738     }
 739
 740     if (!env->vpa_addr) {
 741         reg.id = KVM_REG_PPC_VPA_ADDR;
 742         reg.addr = (uintptr_t)&env->vpa_addr;
 743         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 744         if (ret < 0) {
 745             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 746             return ret;
 747         }
 748     }
 749
 750     return 0;
 751 }
 752 #endif /* TARGET_PPC64 */
 753
 754 int kvm_arch_put_registers(CPUState *cs, int level)
 755 {
 756     PowerPCCPU *cpu = POWERPC_CPU(cs);
 757     CPUPPCState *env = &cpu->env;
 758     struct kvm_regs regs;
 759     int ret;
 760     int i;
 761
 762     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 763     if (ret < 0) {
 764         return ret;
 765     }
 766
 767     regs.ctr = env->ctr;
 768     regs.lr  = env->lr;
 769     regs.xer = cpu_read_xer(env);
 770     regs.msr = env->msr;
 771     regs.pc = env->nip;
 772
 773     regs.srr0 = env->spr[SPR_SRR0];
 774     regs.srr1 = env->spr[SPR_SRR1];
 775
 776     regs.sprg0 = env->spr[SPR_SPRG0];
 777     regs.sprg1 = env->spr[SPR_SPRG1];
 778     regs.sprg2 = env->spr[SPR_SPRG2];
 779     regs.sprg3 = env->spr[SPR_SPRG3];
 780     regs.sprg4 = env->spr[SPR_SPRG4];
 781     regs.sprg5 = env->spr[SPR_SPRG5];
 782     regs.sprg6 = env->spr[SPR_SPRG6];
 783     regs.sprg7 = env->spr[SPR_SPRG7];
 784
 785     regs.pid = env->spr[SPR_BOOKE_PID];
 786
 787     for (i = 0;i < 32; i++)
 788         regs.gpr[i] = env->gpr[i];
 789
 790     regs.cr = 0;
 791     for (i = 0; i < 8; i++) {
 792         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 793     }
 794
 795     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 796     if (ret < 0)
 797         return ret;
 798
 799     kvm_put_fp(cs);
 800
 801     if (env->tlb_dirty) {
 802         kvm_sw_tlb_put(cpu);
 803         env->tlb_dirty = false;
 804     }
 805
 806     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 807         struct kvm_sregs sregs;
 808
 809         sregs.pvr = env->spr[SPR_PVR];
 810
 811         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 812
 813         /* Sync SLB */
 814 #ifdef TARGET_PPC64
 815         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 816             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 817             if (env->slb[i].esid & SLB_ESID_V) {
 818                 sregs.u.s.ppc64.slb[i].slbe |= i;
 819             }
 820             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 821         }
 822 #endif
 823
 824         /* Sync SRs */
 825         for (i = 0; i < 16; i++) {
 826             sregs.u.s.ppc32.sr[i] = env->sr[i];
 827         }
 828
 829         /* Sync BATs */
 830         for (i = 0; i < 8; i++) {
 831             /* Beware. We have to swap upper and lower bits here */
 832             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 833                 | env->DBAT[1][i];
 834             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 835                 | env->IBAT[1][i];
 836         }
 837
 838         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 839         if (ret) {
 840             return ret;
 841         }
 842     }
 843
 844     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 845         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 846     }
 847
 848     if (cap_one_reg) {
 849         int i;
 850
 851         /* We deliberately ignore errors here, for kernels which have
 852          * the ONE_REG calls, but don't support the specific
 853          * registers, there's a reasonable chance things will still
 854          * work, at least until we try to migrate. */
 855         for (i = 0; i < 1024; i++) {
 856             uint64_t id = env->spr_cb[i].one_reg_id;
 857
 858             if (id != 0) {
 859                 kvm_put_one_spr(cs, id, i);
 860             }
 861         }
 862
 863 #ifdef TARGET_PPC64
 864         if (cap_papr) {
 865             if (kvm_put_vpa(cs) < 0) {
 866                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 867             }
 868         }
 869
 870         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 871 #endif /* TARGET_PPC64 */
 872     }
 873
 874     return ret;
 875 }
 876
 877 int kvm_arch_get_registers(CPUState *cs)
 878 {
 879     PowerPCCPU *cpu = POWERPC_CPU(cs);
 880     CPUPPCState *env = &cpu->env;
 881     struct kvm_regs regs;
 882     struct kvm_sregs sregs;
 883     uint32_t cr;
 884     int i, ret;
 885
 886     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 887     if (ret < 0)
 888         return ret;
 889
 890     cr = regs.cr;
 891     for (i = 7; i >= 0; i--) {
 892         env->crf[i] = cr & 15;
 893         cr >>= 4;
 894     }
 895
 896     env->ctr = regs.ctr;
 897     env->lr = regs.lr;
 898     cpu_write_xer(env, regs.xer);
 899     env->msr = regs.msr;
 900     env->nip = regs.pc;
 901
 902     env->spr[SPR_SRR0] = regs.srr0;
 903     env->spr[SPR_SRR1] = regs.srr1;
 904
 905     env->spr[SPR_SPRG0] = regs.sprg0;
 906     env->spr[SPR_SPRG1] = regs.sprg1;
 907     env->spr[SPR_SPRG2] = regs.sprg2;
 908     env->spr[SPR_SPRG3] = regs.sprg3;
 909     env->spr[SPR_SPRG4] = regs.sprg4;
 910     env->spr[SPR_SPRG5] = regs.sprg5;
 911     env->spr[SPR_SPRG6] = regs.sprg6;
 912     env->spr[SPR_SPRG7] = regs.sprg7;
 913
 914     env->spr[SPR_BOOKE_PID] = regs.pid;
 915
 916     for (i = 0;i < 32; i++)
 917         env->gpr[i] = regs.gpr[i];
 918
 919     kvm_get_fp(cs);
 920
 921     if (cap_booke_sregs) {
 922         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 923         if (ret < 0) {
 924             return ret;
 925         }
 926
 927         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 928             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 929             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 930             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 931             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 932             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 933             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 934             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 935             env->spr[SPR_DECR] = sregs.u.e.dec;
 936             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 937             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 938             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 939         }
 940
 941         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 942             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 943             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 944             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 945             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 946             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 947         }
 948
 949         if (sregs.u.e.features & KVM_SREGS_E_64) {
 950             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 951         }
 952
 953         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 954             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 955         }
 956
 957         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 958             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 959             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 960             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 961             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 962             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 963             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 964             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 965             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 966             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 967             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 968             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 969             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 970             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 971             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 972             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 973             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 974
 975             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 976                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 977                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 978                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 979             }
 980
 981             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 982                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 983             }
 984
 985             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 986                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 987                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 988             }
 989         }
 990
 991         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 992             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 993             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 994             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 995             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 996             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 997             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 998             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 999             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1000             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1001             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1002         }
1003
1004         if (sregs.u.e.features & KVM_SREGS_EXP) {
1005             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1006         }
1007
1008         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1009             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1010             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1011         }
1012
1013         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1014             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1015             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1016             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1017
1018             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1019                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1020                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1021             }
1022         }
1023     }
1024
1025     if (cap_segstate) {
1026         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1027         if (ret < 0) {
1028             return ret;
1029         }
1030
1031         if (!env->external_htab) {
1032             ppc_store_sdr1(env, sregs.u.s.sdr1);
1033         }
1034
1035         /* Sync SLB */
1036 #ifdef TARGET_PPC64
1037         /*
1038          * The packed SLB array we get from KVM_GET_SREGS only contains
1039          * information about valid entries. So we flush our internal
1040          * copy to get rid of stale ones, then put all valid SLB entries
1041          * back in.
1042          */
1043         memset(env->slb, 0, sizeof(env->slb));
1044         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1045             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1046             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1047             /*
1048              * Only restore valid entries
1049              */
1050             if (rb & SLB_ESID_V) {
1051                 ppc_store_slb(env, rb, rs);
1052             }
1053         }
1054 #endif
1055
1056         /* Sync SRs */
1057         for (i = 0; i < 16; i++) {
1058             env->sr[i] = sregs.u.s.ppc32.sr[i];
1059         }
1060
1061         /* Sync BATs */
1062         for (i = 0; i < 8; i++) {
1063             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1064             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1065             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1066             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1067         }
1068     }
1069
1070     if (cap_hior) {
1071         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1072     }
1073
1074     if (cap_one_reg) {
1075         int i;
1076
1077         /* We deliberately ignore errors here, for kernels which have
1078          * the ONE_REG calls, but don't support the specific
1079          * registers, there's a reasonable chance things will still
1080          * work, at least until we try to migrate. */
1081         for (i = 0; i < 1024; i++) {
1082             uint64_t id = env->spr_cb[i].one_reg_id;
1083
1084             if (id != 0) {
1085                 kvm_get_one_spr(cs, id, i);
1086             }
1087         }
1088
1089 #ifdef TARGET_PPC64
1090         if (cap_papr) {
1091             if (kvm_get_vpa(cs) < 0) {
1092                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1093             }
1094         }
1095
1096         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1097 #endif
1098     }
1099
1100     return 0;
1101 }
1102
1103 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1104 {
1105     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1106
1107     if (irq != PPC_INTERRUPT_EXT) {
1108         return 0;
1109     }
1110
1111     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1112         return 0;
1113     }
1114
1115     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1116
1117     return 0;
1118 }
1119
1120 #if defined(TARGET_PPCEMB)
1121 #define PPC_INPUT_INT PPC40x_INPUT_INT
1122 #elif defined(TARGET_PPC64)
1123 #define PPC_INPUT_INT PPC970_INPUT_INT
1124 #else
1125 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1126 #endif
1127
1128 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1129 {
1130     PowerPCCPU *cpu = POWERPC_CPU(cs);
1131     CPUPPCState *env = &cpu->env;
1132     int r;
1133     unsigned irq;
1134
1135     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1136      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1137     if (!cap_interrupt_level &&
1138         run->ready_for_interrupt_injection &&
1139         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1140         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1141     {
1142         /* For now KVM disregards the 'irq' argument. However, in the
1143          * future KVM could cache it in-kernel to avoid a heavyweight exit
1144          * when reading the UIC.
1145          */
1146         irq = KVM_INTERRUPT_SET;
1147
1148         DPRINTF("injected interrupt %d\n", irq);
1149         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1150         if (r < 0) {
1151             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1152         }
1153
1154         /* Always wake up soon in case the interrupt was level based */
1155         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1156                        (get_ticks_per_sec() / 50));
1157     }
1158
1159     /* We don't know if there are more interrupts pending after this. However,
1160      * the guest will return to userspace in the course of handling this one
1161      * anyways, so we will get a chance to deliver the rest. */
1162 }
1163
1164 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1165 {
1166 }
1167
1168 int kvm_arch_process_async_events(CPUState *cs)
1169 {
1170     return cs->halted;
1171 }
1172
1173 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1174 {
1175     CPUState *cs = CPU(cpu);
1176     CPUPPCState *env = &cpu->env;
1177
1178     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1179         cs->halted = 1;
1180         cs->exception_index = EXCP_HLT;
1181     }
1182
1183     return 0;
1184 }
1185
1186 /* map dcr access to existing qemu dcr emulation */
1187 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1188 {
1189     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1190         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1191
1192     return 0;
1193 }
1194
1195 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1196 {
1197     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1198         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1199
1200     return 0;
1201 }
1202
1203 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1204 {
1205     PowerPCCPU *cpu = POWERPC_CPU(cs);
1206     CPUPPCState *env = &cpu->env;
1207     int ret;
1208
1209     switch (run->exit_reason) {
1210     case KVM_EXIT_DCR:
1211         if (run->dcr.is_write) {
1212             DPRINTF("handle dcr write\n");
1213             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1214         } else {
1215             DPRINTF("handle dcr read\n");
1216             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1217         }
1218         break;
1219     case KVM_EXIT_HLT:
1220         DPRINTF("handle halt\n");
1221         ret = kvmppc_handle_halt(cpu);
1222         break;
1223 #if defined(TARGET_PPC64)
1224     case KVM_EXIT_PAPR_HCALL:
1225         DPRINTF("handle PAPR hypercall\n");
1226         run->papr_hcall.ret = spapr_hypercall(cpu,
1227                                               run->papr_hcall.nr,
1228                                               run->papr_hcall.args);
1229         ret = 0;
1230         break;
1231 #endif
1232     case KVM_EXIT_EPR:
1233         DPRINTF("handle epr\n");
1234         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1235         ret = 0;
1236         break;
1237     case KVM_EXIT_WATCHDOG:
1238         DPRINTF("handle watchdog expiry\n");
1239         watchdog_perform_action();
1240         ret = 0;
1241         break;
1242
1243     default:
1244         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1245         ret = -1;
1246         break;
1247     }
1248
1249     return ret;
1250 }
1251
1252 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1253 {
1254     CPUState *cs = CPU(cpu);
1255     uint32_t bits = tsr_bits;
1256     struct kvm_one_reg reg = {
1257         .id = KVM_REG_PPC_OR_TSR,
1258         .addr = (uintptr_t) &bits,
1259     };
1260
1261     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1262 }
1263
1264 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1265 {
1266
1267     CPUState *cs = CPU(cpu);
1268     uint32_t bits = tsr_bits;
1269     struct kvm_one_reg reg = {
1270         .id = KVM_REG_PPC_CLEAR_TSR,
1271         .addr = (uintptr_t) &bits,
1272     };
1273
1274     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1275 }
1276
1277 int kvmppc_set_tcr(PowerPCCPU *cpu)
1278 {
1279     CPUState *cs = CPU(cpu);
1280     CPUPPCState *env = &cpu->env;
1281     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1282
1283     struct kvm_one_reg reg = {
1284         .id = KVM_REG_PPC_TCR,
1285         .addr = (uintptr_t) &tcr,
1286     };
1287
1288     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1289 }
1290
1291 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1292 {
1293     CPUState *cs = CPU(cpu);
1294     int ret;
1295
1296     if (!kvm_enabled()) {
1297         return -1;
1298     }
1299
1300     if (!cap_ppc_watchdog) {
1301         printf("warning: KVM does not support watchdog");
1302         return -1;
1303     }
1304
1305     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1306     if (ret < 0) {
1307         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1308                 __func__, strerror(-ret));
1309         return ret;
1310     }
1311
1312     return ret;
1313 }
1314
1315 static int read_cpuinfo(const char *field, char *value, int len)
1316 {
1317     FILE *f;
1318     int ret = -1;
1319     int field_len = strlen(field);
1320     char line[512];
1321
1322     f = fopen("/proc/cpuinfo", "r");
1323     if (!f) {
1324         return -1;
1325     }
1326
1327     do {
1328         if(!fgets(line, sizeof(line), f)) {
1329             break;
1330         }
1331         if (!strncmp(line, field, field_len)) {
1332             pstrcpy(value, len, line);
1333             ret = 0;
1334             break;
1335         }
1336     } while(*line);
1337
1338     fclose(f);
1339
1340     return ret;
1341 }
1342
1343 uint32_t kvmppc_get_tbfreq(void)
1344 {
1345     char line[512];
1346     char *ns;
1347     uint32_t retval = get_ticks_per_sec();
1348
1349     if (read_cpuinfo("timebase", line, sizeof(line))) {
1350         return retval;
1351     }
1352
1353     if (!(ns = strchr(line, ':'))) {
1354         return retval;
1355     }
1356
1357     ns++;
1358
1359     retval = atoi(ns);
1360     return retval;
1361 }
1362
1363 /* Try to find a device tree node for a CPU with clock-frequency property */
1364 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1365 {
1366     struct dirent *dirp;
1367     DIR *dp;
1368
1369     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1370         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1371         return -1;
1372     }
1373
1374     buf[0] = '\0';
1375     while ((dirp = readdir(dp)) != NULL) {
1376         FILE *f;
1377         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1378                  dirp->d_name);
1379         f = fopen(buf, "r");
1380         if (f) {
1381             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1382             fclose(f);
1383             break;
1384         }
1385         buf[0] = '\0';
1386     }
1387     closedir(dp);
1388     if (buf[0] == '\0') {
1389         printf("Unknown host!\n");
1390         return -1;
1391     }
1392
1393     return 0;
1394 }
1395
1396 /* Read a CPU node property from the host device tree that's a single
1397  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1398  * (can't find or open the property, or doesn't understand the
1399  * format) */
1400 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1401 {
1402     char buf[PATH_MAX];
1403     union {
1404         uint32_t v32;
1405         uint64_t v64;
1406     } u;
1407     FILE *f;
1408     int len;
1409
1410     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1411         return -1;
1412     }
1413
1414     strncat(buf, "/", sizeof(buf) - strlen(buf));
1415     strncat(buf, propname, sizeof(buf) - strlen(buf));
1416
1417     f = fopen(buf, "rb");
1418     if (!f) {
1419         return -1;
1420     }
1421
1422     len = fread(&u, 1, sizeof(u), f);
1423     fclose(f);
1424     switch (len) {
1425     case 4:
1426         /* property is a 32-bit quantity */
1427         return be32_to_cpu(u.v32);
1428     case 8:
1429         return be64_to_cpu(u.v64);
1430     }
1431
1432     return 0;
1433 }
1434
1435 uint64_t kvmppc_get_clockfreq(void)
1436 {
1437     return kvmppc_read_int_cpu_dt("clock-frequency");
1438 }
1439
1440 uint32_t kvmppc_get_vmx(void)
1441 {
1442     return kvmppc_read_int_cpu_dt("ibm,vmx");
1443 }
1444
1445 uint32_t kvmppc_get_dfp(void)
1446 {
1447     return kvmppc_read_int_cpu_dt("ibm,dfp");
1448 }
1449
1450 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1451  {
1452      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1453      CPUState *cs = CPU(cpu);
1454
1455     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1456         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1457         return 0;
1458     }
1459
1460     return 1;
1461 }
1462
1463 int kvmppc_get_hasidle(CPUPPCState *env)
1464 {
1465     struct kvm_ppc_pvinfo pvinfo;
1466
1467     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1468         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1469         return 1;
1470     }
1471
1472     return 0;
1473 }
1474
1475 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1476 {
1477     uint32_t *hc = (uint32_t*)buf;
1478     struct kvm_ppc_pvinfo pvinfo;
1479
1480     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1481         memcpy(buf, pvinfo.hcall, buf_len);
1482         return 0;
1483     }
1484
1485     /*
1486      * Fallback to always fail hypercalls:
1487      *
1488      *     li r3, -1
1489      *     nop
1490      *     nop
1491      *     nop
1492      */
1493
1494     hc[0] = 0x3860ffff;
1495     hc[1] = 0x60000000;
1496     hc[2] = 0x60000000;
1497     hc[3] = 0x60000000;
1498
1499     return 0;
1500 }
1501
1502 void kvmppc_set_papr(PowerPCCPU *cpu)
1503 {
1504     CPUState *cs = CPU(cpu);
1505     int ret;
1506
1507     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1508     if (ret) {
1509         cpu_abort(cs, "This KVM version does not support PAPR\n");
1510     }
1511
1512     /* Update the capability flag so we sync the right information
1513      * with kvm */
1514     cap_papr = 1;
1515 }
1516
1517 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1518 {
1519     CPUState *cs = CPU(cpu);
1520     int ret;
1521
1522     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1523     if (ret && mpic_proxy) {
1524         cpu_abort(cs, "This KVM version does not support EPR\n");
1525     }
1526 }
1527
1528 int kvmppc_smt_threads(void)
1529 {
1530     return cap_ppc_smt ? cap_ppc_smt : 1;
1531 }
1532
1533 #ifdef TARGET_PPC64
1534 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1535 {
1536     void *rma;
1537     off_t size;
1538     int fd;
1539     struct kvm_allocate_rma ret;
1540     MemoryRegion *rma_region;
1541
1542     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1543      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1544      *                      not necessary on this hardware
1545      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1546      *
1547      * FIXME: We should allow the user to force contiguous RMA
1548      * allocation in the cap_ppc_rma==1 case.
1549      */
1550     if (cap_ppc_rma < 2) {
1551         return 0;
1552     }
1553
1554     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1555     if (fd < 0) {
1556         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1557                 strerror(errno));
1558         return -1;
1559     }
1560
1561     size = MIN(ret.rma_size, 256ul << 20);
1562
1563     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1564     if (rma == MAP_FAILED) {
1565         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1566         return -1;
1567     };
1568
1569     rma_region = g_new(MemoryRegion, 1);
1570     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1571     vmstate_register_ram_global(rma_region);
1572     memory_region_add_subregion(sysmem, 0, rma_region);
1573
1574     return size;
1575 }
1576
1577 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1578 {
1579     struct kvm_ppc_smmu_info info;
1580     long rampagesize, best_page_shift;
1581     int i;
1582
1583     if (cap_ppc_rma >= 2) {
1584         return current_size;
1585     }
1586
1587     /* Find the largest hardware supported page size that's less than
1588      * or equal to the (logical) backing page size of guest RAM */
1589     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1590     rampagesize = getrampagesize();
1591     best_page_shift = 0;
1592
1593     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1594         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1595
1596         if (!sps->page_shift) {
1597             continue;
1598         }
1599
1600         if ((sps->page_shift > best_page_shift)
1601             && ((1UL << sps->page_shift) <= rampagesize)) {
1602             best_page_shift = sps->page_shift;
1603         }
1604     }
1605
1606     return MIN(current_size,
1607                1ULL << (best_page_shift + hash_shift - 7));
1608 }
1609 #endif
1610
1611 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1612 {
1613     struct kvm_create_spapr_tce args = {
1614         .liobn = liobn,
1615         .window_size = window_size,
1616     };
1617     long len;
1618     int fd;
1619     void *table;
1620
1621     /* Must set fd to -1 so we don't try to munmap when called for
1622      * destroying the table, which the upper layers -will- do
1623      */
1624     *pfd = -1;
1625     if (!cap_spapr_tce) {
1626         return NULL;
1627     }
1628
1629     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1630     if (fd < 0) {
1631         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1632                 liobn);
1633         return NULL;
1634     }
1635
1636     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1637     /* FIXME: round this up to page size */
1638
1639     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1640     if (table == MAP_FAILED) {
1641         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1642                 liobn);
1643         close(fd);
1644         return NULL;
1645     }
1646
1647     *pfd = fd;
1648     return table;
1649 }
1650
1651 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1652 {
1653     long len;
1654
1655     if (fd < 0) {
1656         return -1;
1657     }
1658
1659     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1660     if ((munmap(table, len) < 0) ||
1661         (close(fd) < 0)) {
1662         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1663                 strerror(errno));
1664         /* Leak the table */
1665     }
1666
1667     return 0;
1668 }
1669
1670 int kvmppc_reset_htab(int shift_hint)
1671 {
1672     uint32_t shift = shift_hint;
1673
1674     if (!kvm_enabled()) {
1675         /* Full emulation, tell caller to allocate htab itself */
1676         return 0;
1677     }
1678     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1679         int ret;
1680         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1681         if (ret == -ENOTTY) {
1682             /* At least some versions of PR KVM advertise the
1683              * capability, but don't implement the ioctl().  Oops.
1684              * Return 0 so that we allocate the htab in qemu, as is
1685              * correct for PR. */
1686             return 0;
1687         } else if (ret < 0) {
1688             return ret;
1689         }
1690         return shift;
1691     }
1692
1693     /* We have a kernel that predates the htab reset calls.  For PR
1694      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1695      * this era, it has allocated a 16MB fixed size hash table
1696      * already.  Kernels of this era have the GET_PVINFO capability
1697      * only on PR, so we use this hack to determine the right
1698      * answer */
1699     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1700         /* PR - tell caller to allocate htab */
1701         return 0;
1702     } else {
1703         /* HV - assume 16MB kernel allocated htab */
1704         return 24;
1705     }
1706 }
1707
1708 static inline uint32_t mfpvr(void)
1709 {
1710     uint32_t pvr;
1711
1712     asm ("mfpvr %0"
1713          : "=r"(pvr));
1714     return pvr;
1715 }
1716
1717 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1718 {
1719     if (on) {
1720         *word |= flags;
1721     } else {
1722         *word &= ~flags;
1723     }
1724 }
1725
1726 static void kvmppc_host_cpu_initfn(Object *obj)
1727 {
1728     assert(kvm_enabled());
1729 }
1730
1731 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1732 {
1733     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1734     uint32_t vmx = kvmppc_get_vmx();
1735     uint32_t dfp = kvmppc_get_dfp();
1736     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1737     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1738
1739     /* Now fix up the class with information we can query from the host */
1740     pcc->pvr = mfpvr();
1741
1742     if (vmx != -1) {
1743         /* Only override when we know what the host supports */
1744         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1745         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1746     }
1747     if (dfp != -1) {
1748         /* Only override when we know what the host supports */
1749         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1750     }
1751
1752     if (dcache_size != -1) {
1753         pcc->l1_dcache_size = dcache_size;
1754     }
1755
1756     if (icache_size != -1) {
1757         pcc->l1_icache_size = icache_size;
1758     }
1759 }
1760
1761 bool kvmppc_has_cap_epr(void)
1762 {
1763     return cap_epr;
1764 }
1765
1766 bool kvmppc_has_cap_htab_fd(void)
1767 {
1768     return cap_htab_fd;
1769 }
1770
1771 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1772 {
1773     ObjectClass *oc = OBJECT_CLASS(pcc);
1774
1775     while (oc && !object_class_is_abstract(oc)) {
1776         oc = object_class_get_parent(oc);
1777     }
1778     assert(oc);
1779
1780     return POWERPC_CPU_CLASS(oc);
1781 }
1782
1783 static int kvm_ppc_register_host_cpu_type(void)
1784 {
1785     TypeInfo type_info = {
1786         .name = TYPE_HOST_POWERPC_CPU,
1787         .instance_init = kvmppc_host_cpu_initfn,
1788         .class_init = kvmppc_host_cpu_class_init,
1789     };
1790     uint32_t host_pvr = mfpvr();
1791     PowerPCCPUClass *pvr_pcc;
1792     DeviceClass *dc;
1793
1794     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1795     if (pvr_pcc == NULL) {
1796         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1797     }
1798     if (pvr_pcc == NULL) {
1799         return -1;
1800     }
1801     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1802     type_register(&type_info);
1803
1804     /* Register generic family CPU class for a family */
1805     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1806     dc = DEVICE_CLASS(pvr_pcc);
1807     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1808     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1809     type_register(&type_info);
1810
1811     return 0;
1812 }
1813
1814 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1815 {
1816     struct kvm_rtas_token_args args = {
1817         .token = token,
1818     };
1819
1820     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1821         return -ENOENT;
1822     }
1823
1824     strncpy(args.name, function, sizeof(args.name));
1825
1826     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1827 }
1828
1829 int kvmppc_get_htab_fd(bool write)
1830 {
1831     struct kvm_get_htab_fd s = {
1832         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1833         .start_index = 0,
1834     };
1835
1836     if (!cap_htab_fd) {
1837         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1838         return -1;
1839     }
1840
1841     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1842 }
1843
1844 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1845 {
1846     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1847     uint8_t buf[bufsize];
1848     ssize_t rc;
1849
1850     do {
1851         rc = read(fd, buf, bufsize);
1852         if (rc < 0) {
1853             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1854                     strerror(errno));
1855             return rc;
1856         } else if (rc) {
1857             /* Kernel already retuns data in BE format for the file */
1858             qemu_put_buffer(f, buf, rc);
1859         }
1860     } while ((rc != 0)
1861              && ((max_ns < 0)
1862                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1863
1864     return (rc == 0) ? 1 : 0;
1865 }
1866
1867 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1868                            uint16_t n_valid, uint16_t n_invalid)
1869 {
1870     struct kvm_get_htab_header *buf;
1871     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1872     ssize_t rc;
1873
1874     buf = alloca(chunksize);
1875     /* This is KVM on ppc, so this is all big-endian */
1876     buf->index = index;
1877     buf->n_valid = n_valid;
1878     buf->n_invalid = n_invalid;
1879
1880     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1881
1882     rc = write(fd, buf, chunksize);
1883     if (rc < 0) {
1884         fprintf(stderr, "Error writing KVM hash table: %s\n",
1885                 strerror(errno));
1886         return rc;
1887     }
1888     if (rc != chunksize) {
1889         /* We should never get a short write on a single chunk */
1890         fprintf(stderr, "Short write, restoring KVM hash table\n");
1891         return -1;
1892     }
1893     return 0;
1894 }
1895
1896 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1897 {
1898     return true;
1899 }
1900
1901 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1902 {
1903     return 1;
1904 }
1905
1906 int kvm_arch_on_sigbus(int code, void *addr)
1907 {
1908     return 1;
1909 }
1910
1911 void kvm_arch_init_irq_routing(KVMState *s)
1912 {
1913 }
1914
1915 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1916 {
1917     return -EINVAL;
1918 }
1919
1920 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1921 {
1922     return -EINVAL;
1923 }
1924
1925 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1926 {
1927     return -EINVAL;
1928 }
1929
1930 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1931 {
1932     return -EINVAL;
1933 }
1934
1935 void kvm_arch_remove_all_hw_breakpoints(void)
1936 {
1937 }
1938
1939 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1940 {
1941 }
1942
1943 struct kvm_get_htab_buf {
1944     struct kvm_get_htab_header header;
1945     /*
1946      * We require one extra byte for read
1947      */
1948     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1949 };
1950
1951 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1952 {
1953     int htab_fd;
1954     struct kvm_get_htab_fd ghf;
1955     struct kvm_get_htab_buf  *hpte_buf;
1956
1957     ghf.flags = 0;
1958     ghf.start_index = pte_index;
1959     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1960     if (htab_fd < 0) {
1961         goto error_out;
1962     }
1963
1964     hpte_buf = g_malloc0(sizeof(*hpte_buf));
1965     /*
1966      * Read the hpte group
1967      */
1968     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1969         goto out_close;
1970     }
1971
1972     close(htab_fd);
1973     return (uint64_t)(uintptr_t) hpte_buf->hpte;
1974
1975 out_close:
1976     g_free(hpte_buf);
1977     close(htab_fd);
1978 error_out:
1979     return 0;
1980 }
1981
1982 void kvmppc_hash64_free_pteg(uint64_t token)
1983 {
1984     struct kvm_get_htab_buf *htab_buf;
1985
1986     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1987                             hpte);
1988     g_free(htab_buf);
1989     return;
1990 }
1991
1992 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1993                              target_ulong pte0, target_ulong pte1)
1994 {
1995     int htab_fd;
1996     struct kvm_get_htab_fd ghf;
1997     struct kvm_get_htab_buf hpte_buf;
1998
1999     ghf.flags = 0;
2000     ghf.start_index = 0;     /* Ignored */
2001     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2002     if (htab_fd < 0) {
2003         goto error_out;
2004     }
2005
2006     hpte_buf.header.n_valid = 1;
2007     hpte_buf.header.n_invalid = 0;
2008     hpte_buf.header.index = pte_index;
2009     hpte_buf.hpte[0] = pte0;
2010     hpte_buf.hpte[1] = pte1;
2011     /*
2012      * Write the hpte entry.
2013      * CAUTION: write() has the warn_unused_result attribute. Hence we
2014      * need to check the return value, even though we do nothing.
2015      */
2016     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2017         goto out_close;
2018     }
2019
2020 out_close:
2021     close(htab_fd);
2022     return;
2023
2024 error_out:
2025     return;
2026 }