arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/etr.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/etr.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  69         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  70         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  71         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  72         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  73         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  74         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  75         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  76         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  77         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  78         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  79         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  80         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  81         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  82         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  83         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  84         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  85         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  86         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  87         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  88         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  89         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  90         { "instruction_spx", VCPU_STAT(instruction_spx) },
  91         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  92         { "instruction_stap", VCPU_STAT(instruction_stap) },
  93         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  94         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  95         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  96         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  97         { "instruction_essa", VCPU_STAT(instruction_essa) },
  98         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  99         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 100         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 101         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 102         { "instruction_sie", VCPU_STAT(instruction_sie) },
 103         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 104         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 105         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 106         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 107         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 108         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 109         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 110         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 111         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 112         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 113         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 114         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 115         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 116         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 117         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 118         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 119         { "diagnose_10", VCPU_STAT(diagnose_10) },
 120         { "diagnose_44", VCPU_STAT(diagnose_44) },
 121         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 122         { "diagnose_258", VCPU_STAT(diagnose_258) },
 123         { "diagnose_308", VCPU_STAT(diagnose_308) },
 124         { "diagnose_500", VCPU_STAT(diagnose_500) },
 125         { NULL }
 126 };
 127
 128 /* upper facilities limit for kvm */
 129 unsigned long kvm_s390_fac_list_mask[16] = {
 130         0xffe6000000000000UL,
 131         0x005e000000000000UL,
 132 };
 133
 134 unsigned long kvm_s390_fac_list_mask_size(void)
 135 {
 136         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 137         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 138 }
 139
 140 /* available cpu features supported by kvm */
 141 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 142 /* available subfunctions indicated via query / "test bit" */
 143 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 144
 145 static struct gmap_notifier gmap_notifier;
 146 static struct gmap_notifier vsie_gmap_notifier;
 147 debug_info_t *kvm_s390_dbf;
 148
 149 /* Section: not file related */
 150 int kvm_arch_hardware_enable(void)
 151 {
 152         /* every s390 is virtualization enabled ;-) */
 153         return 0;
 154 }
 155
 156 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 157                               unsigned long end);
 158
 159 /*
 160  * This callback is executed during stop_machine(). All CPUs are therefore
 161  * temporarily stopped. In order not to change guest behavior, we have to
 162  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 163  * so a CPU won't be stopped while calculating with the epoch.
 164  */
 165 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 166                           void *v)
 167 {
 168         struct kvm *kvm;
 169         struct kvm_vcpu *vcpu;
 170         int i;
 171         unsigned long long *delta = v;
 172
 173         list_for_each_entry(kvm, &vm_list, vm_list) {
 174                 kvm->arch.epoch -= *delta;
 175                 kvm_for_each_vcpu(i, vcpu, kvm) {
 176                         vcpu->arch.sie_block->epoch -= *delta;
 177                         if (vcpu->arch.cputm_enabled)
 178                                 vcpu->arch.cputm_start += *delta;
 179                         if (vcpu->arch.vsie_block)
 180                                 vcpu->arch.vsie_block->epoch -= *delta;
 181                 }
 182         }
 183         return NOTIFY_OK;
 184 }
 185
 186 static struct notifier_block kvm_clock_notifier = {
 187         .notifier_call = kvm_clock_sync,
 188 };
 189
 190 int kvm_arch_hardware_setup(void)
 191 {
 192         gmap_notifier.notifier_call = kvm_gmap_notifier;
 193         gmap_register_pte_notifier(&gmap_notifier);
 194         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 195         gmap_register_pte_notifier(&vsie_gmap_notifier);
 196         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 197                                        &kvm_clock_notifier);
 198         return 0;
 199 }
 200
 201 void kvm_arch_hardware_unsetup(void)
 202 {
 203         gmap_unregister_pte_notifier(&gmap_notifier);
 204         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 205         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 206                                          &kvm_clock_notifier);
 207 }
 208
 209 static void allow_cpu_feat(unsigned long nr)
 210 {
 211         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 212 }
 213
 214 static inline int plo_test_bit(unsigned char nr)
 215 {
 216         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 217         int cc = 3; /* subfunction not available */
 218
 219         asm volatile(
 220                 /* Parameter registers are ignored for "test bit" */
 221                 "       plo     0,0,0,0(0)\n"
 222                 "       ipm     %0\n"
 223                 "       srl     %0,28\n"
 224                 : "=d" (cc)
 225                 : "d" (r0)
 226                 : "cc");
 227         return cc == 0;
 228 }
 229
 230 static void kvm_s390_cpu_feat_init(void)
 231 {
 232         int i;
 233
 234         for (i = 0; i < 256; ++i) {
 235                 if (plo_test_bit(i))
 236                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 237         }
 238
 239         if (test_facility(28)) /* TOD-clock steering */
 240                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 241
 242         if (test_facility(17)) { /* MSA */
 243                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 244                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 245                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 246                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 247                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 248         }
 249         if (test_facility(76)) /* MSA3 */
 250                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 251         if (test_facility(77)) { /* MSA4 */
 252                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 253                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 254                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 255                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 256         }
 257         if (test_facility(57)) /* MSA5 */
 258                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 259
 260         if (MACHINE_HAS_ESOP)
 261                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 262         /*
 263          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 264          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 265          */
 266         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 267             !test_facility(3))
 268                 return;
 269         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 270         if (sclp.has_64bscao)
 271                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 272         if (sclp.has_siif)
 273                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 274         if (sclp.has_gpere)
 275                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 276         if (sclp.has_gsls)
 277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 278         if (sclp.has_ib)
 279                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 280         if (sclp.has_cei)
 281                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 282         if (sclp.has_ibs)
 283                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 284 }
 285
 286 int kvm_arch_init(void *opaque)
 287 {
 288         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 289         if (!kvm_s390_dbf)
 290                 return -ENOMEM;
 291
 292         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 293                 debug_unregister(kvm_s390_dbf);
 294                 return -ENOMEM;
 295         }
 296
 297         kvm_s390_cpu_feat_init();
 298
 299         /* Register floating interrupt controller interface. */
 300         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 301 }
 302
 303 void kvm_arch_exit(void)
 304 {
 305         debug_unregister(kvm_s390_dbf);
 306 }
 307
 308 /* Section: device related */
 309 long kvm_arch_dev_ioctl(struct file *filp,
 310                         unsigned int ioctl, unsigned long arg)
 311 {
 312         if (ioctl == KVM_S390_ENABLE_SIE)
 313                 return s390_enable_sie();
 314         return -EINVAL;
 315 }
 316
 317 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 318 {
 319         int r;
 320
 321         switch (ext) {
 322         case KVM_CAP_S390_PSW:
 323         case KVM_CAP_S390_GMAP:
 324         case KVM_CAP_SYNC_MMU:
 325 #ifdef CONFIG_KVM_S390_UCONTROL
 326         case KVM_CAP_S390_UCONTROL:
 327 #endif
 328         case KVM_CAP_ASYNC_PF:
 329         case KVM_CAP_SYNC_REGS:
 330         case KVM_CAP_ONE_REG:
 331         case KVM_CAP_ENABLE_CAP:
 332         case KVM_CAP_S390_CSS_SUPPORT:
 333         case KVM_CAP_IOEVENTFD:
 334         case KVM_CAP_DEVICE_CTRL:
 335         case KVM_CAP_ENABLE_CAP_VM:
 336         case KVM_CAP_S390_IRQCHIP:
 337         case KVM_CAP_VM_ATTRIBUTES:
 338         case KVM_CAP_MP_STATE:
 339         case KVM_CAP_S390_INJECT_IRQ:
 340         case KVM_CAP_S390_USER_SIGP:
 341         case KVM_CAP_S390_USER_STSI:
 342         case KVM_CAP_S390_SKEYS:
 343         case KVM_CAP_S390_IRQ_STATE:
 344                 r = 1;
 345                 break;
 346         case KVM_CAP_S390_MEM_OP:
 347                 r = MEM_OP_MAX_SIZE;
 348                 break;
 349         case KVM_CAP_NR_VCPUS:
 350         case KVM_CAP_MAX_VCPUS:
 351                 r = KVM_S390_BSCA_CPU_SLOTS;
 352                 if (sclp.has_esca && sclp.has_64bscao)
 353                         r = KVM_S390_ESCA_CPU_SLOTS;
 354                 break;
 355         case KVM_CAP_NR_MEMSLOTS:
 356                 r = KVM_USER_MEM_SLOTS;
 357                 break;
 358         case KVM_CAP_S390_COW:
 359                 r = MACHINE_HAS_ESOP;
 360                 break;
 361         case KVM_CAP_S390_VECTOR_REGISTERS:
 362                 r = MACHINE_HAS_VX;
 363                 break;
 364         case KVM_CAP_S390_RI:
 365                 r = test_facility(64);
 366                 break;
 367         default:
 368                 r = 0;
 369         }
 370         return r;
 371 }
 372
 373 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 374                                         struct kvm_memory_slot *memslot)
 375 {
 376         gfn_t cur_gfn, last_gfn;
 377         unsigned long address;
 378         struct gmap *gmap = kvm->arch.gmap;
 379
 380         /* Loop over all guest pages */
 381         last_gfn = memslot->base_gfn + memslot->npages;
 382         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 383                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 384
 385                 if (test_and_clear_guest_dirty(gmap->mm, address))
 386                         mark_page_dirty(kvm, cur_gfn);
 387                 if (fatal_signal_pending(current))
 388                         return;
 389                 cond_resched();
 390         }
 391 }
 392
 393 /* Section: vm related */
 394 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 395
 396 /*
 397  * Get (and clear) the dirty memory log for a memory slot.
 398  */
 399 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 400                                struct kvm_dirty_log *log)
 401 {
 402         int r;
 403         unsigned long n;
 404         struct kvm_memslots *slots;
 405         struct kvm_memory_slot *memslot;
 406         int is_dirty = 0;
 407
 408         mutex_lock(&kvm->slots_lock);
 409
 410         r = -EINVAL;
 411         if (log->slot >= KVM_USER_MEM_SLOTS)
 412                 goto out;
 413
 414         slots = kvm_memslots(kvm);
 415         memslot = id_to_memslot(slots, log->slot);
 416         r = -ENOENT;
 417         if (!memslot->dirty_bitmap)
 418                 goto out;
 419
 420         kvm_s390_sync_dirty_log(kvm, memslot);
 421         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 422         if (r)
 423                 goto out;
 424
 425         /* Clear the dirty log */
 426         if (is_dirty) {
 427                 n = kvm_dirty_bitmap_bytes(memslot);
 428                 memset(memslot->dirty_bitmap, 0, n);
 429         }
 430         r = 0;
 431 out:
 432         mutex_unlock(&kvm->slots_lock);
 433         return r;
 434 }
 435
 436 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 437 {
 438         int r;
 439
 440         if (cap->flags)
 441                 return -EINVAL;
 442
 443         switch (cap->cap) {
 444         case KVM_CAP_S390_IRQCHIP:
 445                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 446                 kvm->arch.use_irqchip = 1;
 447                 r = 0;
 448                 break;
 449         case KVM_CAP_S390_USER_SIGP:
 450                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 451                 kvm->arch.user_sigp = 1;
 452                 r = 0;
 453                 break;
 454         case KVM_CAP_S390_VECTOR_REGISTERS:
 455                 mutex_lock(&kvm->lock);
 456                 if (kvm->created_vcpus) {
 457                         r = -EBUSY;
 458                 } else if (MACHINE_HAS_VX) {
 459                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 460                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 461                         r = 0;
 462                 } else
 463                         r = -EINVAL;
 464                 mutex_unlock(&kvm->lock);
 465                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 466                          r ? "(not available)" : "(success)");
 467                 break;
 468         case KVM_CAP_S390_RI:
 469                 r = -EINVAL;
 470                 mutex_lock(&kvm->lock);
 471                 if (kvm->created_vcpus) {
 472                         r = -EBUSY;
 473                 } else if (test_facility(64)) {
 474                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 475                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 476                         r = 0;
 477                 }
 478                 mutex_unlock(&kvm->lock);
 479                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 480                          r ? "(not available)" : "(success)");
 481                 break;
 482         case KVM_CAP_S390_USER_STSI:
 483                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 484                 kvm->arch.user_stsi = 1;
 485                 r = 0;
 486                 break;
 487         default:
 488                 r = -EINVAL;
 489                 break;
 490         }
 491         return r;
 492 }
 493
 494 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 495 {
 496         int ret;
 497
 498         switch (attr->attr) {
 499         case KVM_S390_VM_MEM_LIMIT_SIZE:
 500                 ret = 0;
 501                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 502                          kvm->arch.mem_limit);
 503                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 504                         ret = -EFAULT;
 505                 break;
 506         default:
 507                 ret = -ENXIO;
 508                 break;
 509         }
 510         return ret;
 511 }
 512
 513 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 514 {
 515         int ret;
 516         unsigned int idx;
 517         switch (attr->attr) {
 518         case KVM_S390_VM_MEM_ENABLE_CMMA:
 519                 ret = -ENXIO;
 520                 if (!sclp.has_cmma)
 521                         break;
 522
 523                 ret = -EBUSY;
 524                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 525                 mutex_lock(&kvm->lock);
 526                 if (!kvm->created_vcpus) {
 527                         kvm->arch.use_cmma = 1;
 528                         ret = 0;
 529                 }
 530                 mutex_unlock(&kvm->lock);
 531                 break;
 532         case KVM_S390_VM_MEM_CLR_CMMA:
 533                 ret = -ENXIO;
 534                 if (!sclp.has_cmma)
 535                         break;
 536                 ret = -EINVAL;
 537                 if (!kvm->arch.use_cmma)
 538                         break;
 539
 540                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 541                 mutex_lock(&kvm->lock);
 542                 idx = srcu_read_lock(&kvm->srcu);
 543                 s390_reset_cmma(kvm->arch.gmap->mm);
 544                 srcu_read_unlock(&kvm->srcu, idx);
 545                 mutex_unlock(&kvm->lock);
 546                 ret = 0;
 547                 break;
 548         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 549                 unsigned long new_limit;
 550
 551                 if (kvm_is_ucontrol(kvm))
 552                         return -EINVAL;
 553
 554                 if (get_user(new_limit, (u64 __user *)attr->addr))
 555                         return -EFAULT;
 556
 557                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 558                     new_limit > kvm->arch.mem_limit)
 559                         return -E2BIG;
 560
 561                 if (!new_limit)
 562                         return -EINVAL;
 563
 564                 /* gmap_create takes last usable address */
 565                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 566                         new_limit -= 1;
 567
 568                 ret = -EBUSY;
 569                 mutex_lock(&kvm->lock);
 570                 if (!kvm->created_vcpus) {
 571                         /* gmap_create will round the limit up */
 572                         struct gmap *new = gmap_create(current->mm, new_limit);
 573
 574                         if (!new) {
 575                                 ret = -ENOMEM;
 576                         } else {
 577                                 gmap_remove(kvm->arch.gmap);
 578                                 new->private = kvm;
 579                                 kvm->arch.gmap = new;
 580                                 ret = 0;
 581                         }
 582                 }
 583                 mutex_unlock(&kvm->lock);
 584                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 585                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 586                          (void *) kvm->arch.gmap->asce);
 587                 break;
 588         }
 589         default:
 590                 ret = -ENXIO;
 591                 break;
 592         }
 593         return ret;
 594 }
 595
 596 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 597
 598 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 599 {
 600         struct kvm_vcpu *vcpu;
 601         int i;
 602
 603         if (!test_kvm_facility(kvm, 76))
 604                 return -EINVAL;
 605
 606         mutex_lock(&kvm->lock);
 607         switch (attr->attr) {
 608         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 609                 get_random_bytes(
 610                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 611                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 612                 kvm->arch.crypto.aes_kw = 1;
 613                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 614                 break;
 615         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 616                 get_random_bytes(
 617                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 618                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 619                 kvm->arch.crypto.dea_kw = 1;
 620                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 621                 break;
 622         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 623                 kvm->arch.crypto.aes_kw = 0;
 624                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 625                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 626                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 627                 break;
 628         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 629                 kvm->arch.crypto.dea_kw = 0;
 630                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 631                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 632                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 633                 break;
 634         default:
 635                 mutex_unlock(&kvm->lock);
 636                 return -ENXIO;
 637         }
 638
 639         kvm_for_each_vcpu(i, vcpu, kvm) {
 640                 kvm_s390_vcpu_crypto_setup(vcpu);
 641                 exit_sie(vcpu);
 642         }
 643         mutex_unlock(&kvm->lock);
 644         return 0;
 645 }
 646
 647 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 648 {
 649         u8 gtod_high;
 650
 651         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 652                                            sizeof(gtod_high)))
 653                 return -EFAULT;
 654
 655         if (gtod_high != 0)
 656                 return -EINVAL;
 657         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 658
 659         return 0;
 660 }
 661
 662 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 663 {
 664         u64 gtod;
 665
 666         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 667                 return -EFAULT;
 668
 669         kvm_s390_set_tod_clock(kvm, gtod);
 670         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 671         return 0;
 672 }
 673
 674 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 675 {
 676         int ret;
 677
 678         if (attr->flags)
 679                 return -EINVAL;
 680
 681         switch (attr->attr) {
 682         case KVM_S390_VM_TOD_HIGH:
 683                 ret = kvm_s390_set_tod_high(kvm, attr);
 684                 break;
 685         case KVM_S390_VM_TOD_LOW:
 686                 ret = kvm_s390_set_tod_low(kvm, attr);
 687                 break;
 688         default:
 689                 ret = -ENXIO;
 690                 break;
 691         }
 692         return ret;
 693 }
 694
 695 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 696 {
 697         u8 gtod_high = 0;
 698
 699         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 700                                          sizeof(gtod_high)))
 701                 return -EFAULT;
 702         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 703
 704         return 0;
 705 }
 706
 707 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 708 {
 709         u64 gtod;
 710
 711         gtod = kvm_s390_get_tod_clock_fast(kvm);
 712         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 713                 return -EFAULT;
 714         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 715
 716         return 0;
 717 }
 718
 719 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 720 {
 721         int ret;
 722
 723         if (attr->flags)
 724                 return -EINVAL;
 725
 726         switch (attr->attr) {
 727         case KVM_S390_VM_TOD_HIGH:
 728                 ret = kvm_s390_get_tod_high(kvm, attr);
 729                 break;
 730         case KVM_S390_VM_TOD_LOW:
 731                 ret = kvm_s390_get_tod_low(kvm, attr);
 732                 break;
 733         default:
 734                 ret = -ENXIO;
 735                 break;
 736         }
 737         return ret;
 738 }
 739
 740 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 741 {
 742         struct kvm_s390_vm_cpu_processor *proc;
 743         u16 lowest_ibc, unblocked_ibc;
 744         int ret = 0;
 745
 746         mutex_lock(&kvm->lock);
 747         if (kvm->created_vcpus) {
 748                 ret = -EBUSY;
 749                 goto out;
 750         }
 751         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 752         if (!proc) {
 753                 ret = -ENOMEM;
 754                 goto out;
 755         }
 756         if (!copy_from_user(proc, (void __user *)attr->addr,
 757                             sizeof(*proc))) {
 758                 kvm->arch.model.cpuid = proc->cpuid;
 759                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 760                 unblocked_ibc = sclp.ibc & 0xfff;
 761                 if (lowest_ibc) {
 762                         if (proc->ibc > unblocked_ibc)
 763                                 kvm->arch.model.ibc = unblocked_ibc;
 764                         else if (proc->ibc < lowest_ibc)
 765                                 kvm->arch.model.ibc = lowest_ibc;
 766                         else
 767                                 kvm->arch.model.ibc = proc->ibc;
 768                 }
 769                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 770                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 771         } else
 772                 ret = -EFAULT;
 773         kfree(proc);
 774 out:
 775         mutex_unlock(&kvm->lock);
 776         return ret;
 777 }
 778
 779 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 780                                        struct kvm_device_attr *attr)
 781 {
 782         struct kvm_s390_vm_cpu_feat data;
 783         int ret = -EBUSY;
 784
 785         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 786                 return -EFAULT;
 787         if (!bitmap_subset((unsigned long *) data.feat,
 788                            kvm_s390_available_cpu_feat,
 789                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 790                 return -EINVAL;
 791
 792         mutex_lock(&kvm->lock);
 793         if (!atomic_read(&kvm->online_vcpus)) {
 794                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 795                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 796                 ret = 0;
 797         }
 798         mutex_unlock(&kvm->lock);
 799         return ret;
 800 }
 801
 802 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 803                                           struct kvm_device_attr *attr)
 804 {
 805         /*
 806          * Once supported by kernel + hw, we have to store the subfunctions
 807          * in kvm->arch and remember that user space configured them.
 808          */
 809         return -ENXIO;
 810 }
 811
 812 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 813 {
 814         int ret = -ENXIO;
 815
 816         switch (attr->attr) {
 817         case KVM_S390_VM_CPU_PROCESSOR:
 818                 ret = kvm_s390_set_processor(kvm, attr);
 819                 break;
 820         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 821                 ret = kvm_s390_set_processor_feat(kvm, attr);
 822                 break;
 823         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 824                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 825                 break;
 826         }
 827         return ret;
 828 }
 829
 830 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 831 {
 832         struct kvm_s390_vm_cpu_processor *proc;
 833         int ret = 0;
 834
 835         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 836         if (!proc) {
 837                 ret = -ENOMEM;
 838                 goto out;
 839         }
 840         proc->cpuid = kvm->arch.model.cpuid;
 841         proc->ibc = kvm->arch.model.ibc;
 842         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 843                S390_ARCH_FAC_LIST_SIZE_BYTE);
 844         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 845                 ret = -EFAULT;
 846         kfree(proc);
 847 out:
 848         return ret;
 849 }
 850
 851 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 852 {
 853         struct kvm_s390_vm_cpu_machine *mach;
 854         int ret = 0;
 855
 856         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 857         if (!mach) {
 858                 ret = -ENOMEM;
 859                 goto out;
 860         }
 861         get_cpu_id((struct cpuid *) &mach->cpuid);
 862         mach->ibc = sclp.ibc;
 863         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 864                S390_ARCH_FAC_LIST_SIZE_BYTE);
 865         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 866                S390_ARCH_FAC_LIST_SIZE_BYTE);
 867         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 868                 ret = -EFAULT;
 869         kfree(mach);
 870 out:
 871         return ret;
 872 }
 873
 874 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 875                                        struct kvm_device_attr *attr)
 876 {
 877         struct kvm_s390_vm_cpu_feat data;
 878
 879         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 880                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 881         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 882                 return -EFAULT;
 883         return 0;
 884 }
 885
 886 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 887                                      struct kvm_device_attr *attr)
 888 {
 889         struct kvm_s390_vm_cpu_feat data;
 890
 891         bitmap_copy((unsigned long *) data.feat,
 892                     kvm_s390_available_cpu_feat,
 893                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 894         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 895                 return -EFAULT;
 896         return 0;
 897 }
 898
 899 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 900                                           struct kvm_device_attr *attr)
 901 {
 902         /*
 903          * Once we can actually configure subfunctions (kernel + hw support),
 904          * we have to check if they were already set by user space, if so copy
 905          * them from kvm->arch.
 906          */
 907         return -ENXIO;
 908 }
 909
 910 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 911                                         struct kvm_device_attr *attr)
 912 {
 913         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 914             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 915                 return -EFAULT;
 916         return 0;
 917 }
 918 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 919 {
 920         int ret = -ENXIO;
 921
 922         switch (attr->attr) {
 923         case KVM_S390_VM_CPU_PROCESSOR:
 924                 ret = kvm_s390_get_processor(kvm, attr);
 925                 break;
 926         case KVM_S390_VM_CPU_MACHINE:
 927                 ret = kvm_s390_get_machine(kvm, attr);
 928                 break;
 929         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 930                 ret = kvm_s390_get_processor_feat(kvm, attr);
 931                 break;
 932         case KVM_S390_VM_CPU_MACHINE_FEAT:
 933                 ret = kvm_s390_get_machine_feat(kvm, attr);
 934                 break;
 935         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 936                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 937                 break;
 938         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 939                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 940                 break;
 941         }
 942         return ret;
 943 }
 944
 945 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 946 {
 947         int ret;
 948
 949         switch (attr->group) {
 950         case KVM_S390_VM_MEM_CTRL:
 951                 ret = kvm_s390_set_mem_control(kvm, attr);
 952                 break;
 953         case KVM_S390_VM_TOD:
 954                 ret = kvm_s390_set_tod(kvm, attr);
 955                 break;
 956         case KVM_S390_VM_CPU_MODEL:
 957                 ret = kvm_s390_set_cpu_model(kvm, attr);
 958                 break;
 959         case KVM_S390_VM_CRYPTO:
 960                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 961                 break;
 962         default:
 963                 ret = -ENXIO;
 964                 break;
 965         }
 966
 967         return ret;
 968 }
 969
 970 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 971 {
 972         int ret;
 973
 974         switch (attr->group) {
 975         case KVM_S390_VM_MEM_CTRL:
 976                 ret = kvm_s390_get_mem_control(kvm, attr);
 977                 break;
 978         case KVM_S390_VM_TOD:
 979                 ret = kvm_s390_get_tod(kvm, attr);
 980                 break;
 981         case KVM_S390_VM_CPU_MODEL:
 982                 ret = kvm_s390_get_cpu_model(kvm, attr);
 983                 break;
 984         default:
 985                 ret = -ENXIO;
 986                 break;
 987         }
 988
 989         return ret;
 990 }
 991
 992 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 993 {
 994         int ret;
 995
 996         switch (attr->group) {
 997         case KVM_S390_VM_MEM_CTRL:
 998                 switch (attr->attr) {
 999                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1000                 case KVM_S390_VM_MEM_CLR_CMMA:
1001                         ret = sclp.has_cmma ? 0 : -ENXIO;
1002                         break;
1003                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1004                         ret = 0;
1005                         break;
1006                 default:
1007                         ret = -ENXIO;
1008                         break;
1009                 }
1010                 break;
1011         case KVM_S390_VM_TOD:
1012                 switch (attr->attr) {
1013                 case KVM_S390_VM_TOD_LOW:
1014                 case KVM_S390_VM_TOD_HIGH:
1015                         ret = 0;
1016                         break;
1017                 default:
1018                         ret = -ENXIO;
1019                         break;
1020                 }
1021                 break;
1022         case KVM_S390_VM_CPU_MODEL:
1023                 switch (attr->attr) {
1024                 case KVM_S390_VM_CPU_PROCESSOR:
1025                 case KVM_S390_VM_CPU_MACHINE:
1026                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1027                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1028                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1029                         ret = 0;
1030                         break;
1031                 /* configuring subfunctions is not supported yet */
1032                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1033                 default:
1034                         ret = -ENXIO;
1035                         break;
1036                 }
1037                 break;
1038         case KVM_S390_VM_CRYPTO:
1039                 switch (attr->attr) {
1040                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1041                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1042                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1043                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1044                         ret = 0;
1045                         break;
1046                 default:
1047                         ret = -ENXIO;
1048                         break;
1049                 }
1050                 break;
1051         default:
1052                 ret = -ENXIO;
1053                 break;
1054         }
1055
1056         return ret;
1057 }
1058
1059 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1060 {
1061         uint8_t *keys;
1062         uint64_t hva;
1063         int i, r = 0;
1064
1065         if (args->flags != 0)
1066                 return -EINVAL;
1067
1068         /* Is this guest using storage keys? */
1069         if (!mm_use_skey(current->mm))
1070                 return KVM_S390_GET_SKEYS_NONE;
1071
1072         /* Enforce sane limit on memory allocation */
1073         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1074                 return -EINVAL;
1075
1076         keys = kmalloc_array(args->count, sizeof(uint8_t),
1077                              GFP_KERNEL | __GFP_NOWARN);
1078         if (!keys)
1079                 keys = vmalloc(sizeof(uint8_t) * args->count);
1080         if (!keys)
1081                 return -ENOMEM;
1082
1083         down_read(&current->mm->mmap_sem);
1084         for (i = 0; i < args->count; i++) {
1085                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1086                 if (kvm_is_error_hva(hva)) {
1087                         r = -EFAULT;
1088                         break;
1089                 }
1090
1091                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1092                 if (r)
1093                         break;
1094         }
1095         up_read(&current->mm->mmap_sem);
1096
1097         if (!r) {
1098                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1099                                  sizeof(uint8_t) * args->count);
1100                 if (r)
1101                         r = -EFAULT;
1102         }
1103
1104         kvfree(keys);
1105         return r;
1106 }
1107
1108 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1109 {
1110         uint8_t *keys;
1111         uint64_t hva;
1112         int i, r = 0;
1113
1114         if (args->flags != 0)
1115                 return -EINVAL;
1116
1117         /* Enforce sane limit on memory allocation */
1118         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1119                 return -EINVAL;
1120
1121         keys = kmalloc_array(args->count, sizeof(uint8_t),
1122                              GFP_KERNEL | __GFP_NOWARN);
1123         if (!keys)
1124                 keys = vmalloc(sizeof(uint8_t) * args->count);
1125         if (!keys)
1126                 return -ENOMEM;
1127
1128         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1129                            sizeof(uint8_t) * args->count);
1130         if (r) {
1131                 r = -EFAULT;
1132                 goto out;
1133         }
1134
1135         /* Enable storage key handling for the guest */
1136         r = s390_enable_skey();
1137         if (r)
1138                 goto out;
1139
1140         down_read(&current->mm->mmap_sem);
1141         for (i = 0; i < args->count; i++) {
1142                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1143                 if (kvm_is_error_hva(hva)) {
1144                         r = -EFAULT;
1145                         break;
1146                 }
1147
1148                 /* Lowest order bit is reserved */
1149                 if (keys[i] & 0x01) {
1150                         r = -EINVAL;
1151                         break;
1152                 }
1153
1154                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1155                 if (r)
1156                         break;
1157         }
1158         up_read(&current->mm->mmap_sem);
1159 out:
1160         kvfree(keys);
1161         return r;
1162 }
1163
1164 long kvm_arch_vm_ioctl(struct file *filp,
1165                        unsigned int ioctl, unsigned long arg)
1166 {
1167         struct kvm *kvm = filp->private_data;
1168         void __user *argp = (void __user *)arg;
1169         struct kvm_device_attr attr;
1170         int r;
1171
1172         switch (ioctl) {
1173         case KVM_S390_INTERRUPT: {
1174                 struct kvm_s390_interrupt s390int;
1175
1176                 r = -EFAULT;
1177                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1178                         break;
1179                 r = kvm_s390_inject_vm(kvm, &s390int);
1180                 break;
1181         }
1182         case KVM_ENABLE_CAP: {
1183                 struct kvm_enable_cap cap;
1184                 r = -EFAULT;
1185                 if (copy_from_user(&cap, argp, sizeof(cap)))
1186                         break;
1187                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1188                 break;
1189         }
1190         case KVM_CREATE_IRQCHIP: {
1191                 struct kvm_irq_routing_entry routing;
1192
1193                 r = -EINVAL;
1194                 if (kvm->arch.use_irqchip) {
1195                         /* Set up dummy routing. */
1196                         memset(&routing, 0, sizeof(routing));
1197                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1198                 }
1199                 break;
1200         }
1201         case KVM_SET_DEVICE_ATTR: {
1202                 r = -EFAULT;
1203                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1204                         break;
1205                 r = kvm_s390_vm_set_attr(kvm, &attr);
1206                 break;
1207         }
1208         case KVM_GET_DEVICE_ATTR: {
1209                 r = -EFAULT;
1210                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1211                         break;
1212                 r = kvm_s390_vm_get_attr(kvm, &attr);
1213                 break;
1214         }
1215         case KVM_HAS_DEVICE_ATTR: {
1216                 r = -EFAULT;
1217                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1218                         break;
1219                 r = kvm_s390_vm_has_attr(kvm, &attr);
1220                 break;
1221         }
1222         case KVM_S390_GET_SKEYS: {
1223                 struct kvm_s390_skeys args;
1224
1225                 r = -EFAULT;
1226                 if (copy_from_user(&args, argp,
1227                                    sizeof(struct kvm_s390_skeys)))
1228                         break;
1229                 r = kvm_s390_get_skeys(kvm, &args);
1230                 break;
1231         }
1232         case KVM_S390_SET_SKEYS: {
1233                 struct kvm_s390_skeys args;
1234
1235                 r = -EFAULT;
1236                 if (copy_from_user(&args, argp,
1237                                    sizeof(struct kvm_s390_skeys)))
1238                         break;
1239                 r = kvm_s390_set_skeys(kvm, &args);
1240                 break;
1241         }
1242         default:
1243                 r = -ENOTTY;
1244         }
1245
1246         return r;
1247 }
1248
1249 static int kvm_s390_query_ap_config(u8 *config)
1250 {
1251         u32 fcn_code = 0x04000000UL;
1252         u32 cc = 0;
1253
1254         memset(config, 0, 128);
1255         asm volatile(
1256                 "lgr 0,%1\n"
1257                 "lgr 2,%2\n"
1258                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1259                 "0: ipm %0\n"
1260                 "srl %0,28\n"
1261                 "1:\n"
1262                 EX_TABLE(0b, 1b)
1263                 : "+r" (cc)
1264                 : "r" (fcn_code), "r" (config)
1265                 : "cc", "0", "2", "memory"
1266         );
1267
1268         return cc;
1269 }
1270
1271 static int kvm_s390_apxa_installed(void)
1272 {
1273         u8 config[128];
1274         int cc;
1275
1276         if (test_facility(12)) {
1277                 cc = kvm_s390_query_ap_config(config);
1278
1279                 if (cc)
1280                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1281                 else
1282                         return config[0] & 0x40;
1283         }
1284
1285         return 0;
1286 }
1287
1288 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1289 {
1290         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1291
1292         if (kvm_s390_apxa_installed())
1293                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1294         else
1295                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1296 }
1297
1298 static u64 kvm_s390_get_initial_cpuid(void)
1299 {
1300         struct cpuid cpuid;
1301
1302         get_cpu_id(&cpuid);
1303         cpuid.version = 0xff;
1304         return *((u64 *) &cpuid);
1305 }
1306
1307 static void kvm_s390_crypto_init(struct kvm *kvm)
1308 {
1309         if (!test_kvm_facility(kvm, 76))
1310                 return;
1311
1312         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1313         kvm_s390_set_crycb_format(kvm);
1314
1315         /* Enable AES/DEA protected key functions by default */
1316         kvm->arch.crypto.aes_kw = 1;
1317         kvm->arch.crypto.dea_kw = 1;
1318         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1319                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1320         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1321                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1322 }
1323
1324 static void sca_dispose(struct kvm *kvm)
1325 {
1326         if (kvm->arch.use_esca)
1327                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1328         else
1329                 free_page((unsigned long)(kvm->arch.sca));
1330         kvm->arch.sca = NULL;
1331 }
1332
1333 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1334 {
1335         gfp_t alloc_flags = GFP_KERNEL;
1336         int i, rc;
1337         char debug_name[16];
1338         static unsigned long sca_offset;
1339
1340         rc = -EINVAL;
1341 #ifdef CONFIG_KVM_S390_UCONTROL
1342         if (type & ~KVM_VM_S390_UCONTROL)
1343                 goto out_err;
1344         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1345                 goto out_err;
1346 #else
1347         if (type)
1348                 goto out_err;
1349 #endif
1350
1351         rc = s390_enable_sie();
1352         if (rc)
1353                 goto out_err;
1354
1355         rc = -ENOMEM;
1356
1357         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1358
1359         kvm->arch.use_esca = 0; /* start with basic SCA */
1360         if (!sclp.has_64bscao)
1361                 alloc_flags |= GFP_DMA;
1362         rwlock_init(&kvm->arch.sca_lock);
1363         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1364         if (!kvm->arch.sca)
1365                 goto out_err;
1366         spin_lock(&kvm_lock);
1367         sca_offset += 16;
1368         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1369                 sca_offset = 0;
1370         kvm->arch.sca = (struct bsca_block *)
1371                         ((char *) kvm->arch.sca + sca_offset);
1372         spin_unlock(&kvm_lock);
1373
1374         sprintf(debug_name, "kvm-%u", current->pid);
1375
1376         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1377         if (!kvm->arch.dbf)
1378                 goto out_err;
1379
1380         kvm->arch.sie_page2 =
1381              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1382         if (!kvm->arch.sie_page2)
1383                 goto out_err;
1384
1385         /* Populate the facility mask initially. */
1386         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1387                S390_ARCH_FAC_LIST_SIZE_BYTE);
1388         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1389                 if (i < kvm_s390_fac_list_mask_size())
1390                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1391                 else
1392                         kvm->arch.model.fac_mask[i] = 0UL;
1393         }
1394
1395         /* Populate the facility list initially. */
1396         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1397         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1398                S390_ARCH_FAC_LIST_SIZE_BYTE);
1399
1400         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1401         set_kvm_facility(kvm->arch.model.fac_list, 74);
1402
1403         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1404         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1405
1406         kvm_s390_crypto_init(kvm);
1407
1408         spin_lock_init(&kvm->arch.float_int.lock);
1409         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1410                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1411         init_waitqueue_head(&kvm->arch.ipte_wq);
1412         mutex_init(&kvm->arch.ipte_mutex);
1413
1414         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1415         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1416
1417         if (type & KVM_VM_S390_UCONTROL) {
1418                 kvm->arch.gmap = NULL;
1419                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1420         } else {
1421                 if (sclp.hamax == U64_MAX)
1422                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1423                 else
1424                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1425                                                     sclp.hamax + 1);
1426                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1427                 if (!kvm->arch.gmap)
1428                         goto out_err;
1429                 kvm->arch.gmap->private = kvm;
1430                 kvm->arch.gmap->pfault_enabled = 0;
1431         }
1432
1433         kvm->arch.css_support = 0;
1434         kvm->arch.use_irqchip = 0;
1435         kvm->arch.epoch = 0;
1436
1437         spin_lock_init(&kvm->arch.start_stop_lock);
1438         kvm_s390_vsie_init(kvm);
1439         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1440
1441         return 0;
1442 out_err:
1443         free_page((unsigned long)kvm->arch.sie_page2);
1444         debug_unregister(kvm->arch.dbf);
1445         sca_dispose(kvm);
1446         KVM_EVENT(3, "creation of vm failed: %d", rc);
1447         return rc;
1448 }
1449
1450 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1451 {
1452         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1453         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1454         kvm_s390_clear_local_irqs(vcpu);
1455         kvm_clear_async_pf_completion_queue(vcpu);
1456         if (!kvm_is_ucontrol(vcpu->kvm))
1457                 sca_del_vcpu(vcpu);
1458
1459         if (kvm_is_ucontrol(vcpu->kvm))
1460                 gmap_remove(vcpu->arch.gmap);
1461
1462         if (vcpu->kvm->arch.use_cmma)
1463                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1464         free_page((unsigned long)(vcpu->arch.sie_block));
1465
1466         kvm_vcpu_uninit(vcpu);
1467         kmem_cache_free(kvm_vcpu_cache, vcpu);
1468 }
1469
1470 static void kvm_free_vcpus(struct kvm *kvm)
1471 {
1472         unsigned int i;
1473         struct kvm_vcpu *vcpu;
1474
1475         kvm_for_each_vcpu(i, vcpu, kvm)
1476                 kvm_arch_vcpu_destroy(vcpu);
1477
1478         mutex_lock(&kvm->lock);
1479         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1480                 kvm->vcpus[i] = NULL;
1481
1482         atomic_set(&kvm->online_vcpus, 0);
1483         mutex_unlock(&kvm->lock);
1484 }
1485
1486 void kvm_arch_destroy_vm(struct kvm *kvm)
1487 {
1488         kvm_free_vcpus(kvm);
1489         sca_dispose(kvm);
1490         debug_unregister(kvm->arch.dbf);
1491         free_page((unsigned long)kvm->arch.sie_page2);
1492         if (!kvm_is_ucontrol(kvm))
1493                 gmap_remove(kvm->arch.gmap);
1494         kvm_s390_destroy_adapters(kvm);
1495         kvm_s390_clear_float_irqs(kvm);
1496         kvm_s390_vsie_destroy(kvm);
1497         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1498 }
1499
1500 /* Section: vcpu related */
1501 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1502 {
1503         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1504         if (!vcpu->arch.gmap)
1505                 return -ENOMEM;
1506         vcpu->arch.gmap->private = vcpu->kvm;
1507
1508         return 0;
1509 }
1510
1511 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1512 {
1513         read_lock(&vcpu->kvm->arch.sca_lock);
1514         if (vcpu->kvm->arch.use_esca) {
1515                 struct esca_block *sca = vcpu->kvm->arch.sca;
1516
1517                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1518                 sca->cpu[vcpu->vcpu_id].sda = 0;
1519         } else {
1520                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1521
1522                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1523                 sca->cpu[vcpu->vcpu_id].sda = 0;
1524         }
1525         read_unlock(&vcpu->kvm->arch.sca_lock);
1526 }
1527
1528 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1529 {
1530         read_lock(&vcpu->kvm->arch.sca_lock);
1531         if (vcpu->kvm->arch.use_esca) {
1532                 struct esca_block *sca = vcpu->kvm->arch.sca;
1533
1534                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1535                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1536                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1537                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1538                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1539         } else {
1540                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1541
1542                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1543                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1544                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1545                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1546         }
1547         read_unlock(&vcpu->kvm->arch.sca_lock);
1548 }
1549
1550 /* Basic SCA to Extended SCA data copy routines */
1551 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1552 {
1553         d->sda = s->sda;
1554         d->sigp_ctrl.c = s->sigp_ctrl.c;
1555         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1556 }
1557
1558 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1559 {
1560         int i;
1561
1562         d->ipte_control = s->ipte_control;
1563         d->mcn[0] = s->mcn;
1564         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1565                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1566 }
1567
1568 static int sca_switch_to_extended(struct kvm *kvm)
1569 {
1570         struct bsca_block *old_sca = kvm->arch.sca;
1571         struct esca_block *new_sca;
1572         struct kvm_vcpu *vcpu;
1573         unsigned int vcpu_idx;
1574         u32 scaol, scaoh;
1575
1576         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1577         if (!new_sca)
1578                 return -ENOMEM;
1579
1580         scaoh = (u32)((u64)(new_sca) >> 32);
1581         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1582
1583         kvm_s390_vcpu_block_all(kvm);
1584         write_lock(&kvm->arch.sca_lock);
1585
1586         sca_copy_b_to_e(new_sca, old_sca);
1587
1588         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1589                 vcpu->arch.sie_block->scaoh = scaoh;
1590                 vcpu->arch.sie_block->scaol = scaol;
1591                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1592         }
1593         kvm->arch.sca = new_sca;
1594         kvm->arch.use_esca = 1;
1595
1596         write_unlock(&kvm->arch.sca_lock);
1597         kvm_s390_vcpu_unblock_all(kvm);
1598
1599         free_page((unsigned long)old_sca);
1600
1601         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1602                  old_sca, kvm->arch.sca);
1603         return 0;
1604 }
1605
1606 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1607 {
1608         int rc;
1609
1610         if (id < KVM_S390_BSCA_CPU_SLOTS)
1611                 return true;
1612         if (!sclp.has_esca || !sclp.has_64bscao)
1613                 return false;
1614
1615         mutex_lock(&kvm->lock);
1616         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1617         mutex_unlock(&kvm->lock);
1618
1619         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1620 }
1621
1622 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1623 {
1624         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1625         kvm_clear_async_pf_completion_queue(vcpu);
1626         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1627                                     KVM_SYNC_GPRS |
1628                                     KVM_SYNC_ACRS |
1629                                     KVM_SYNC_CRS |
1630                                     KVM_SYNC_ARCH0 |
1631                                     KVM_SYNC_PFAULT;
1632         if (test_kvm_facility(vcpu->kvm, 64))
1633                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1634         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1635          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1636          */
1637         if (MACHINE_HAS_VX)
1638                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1639         else
1640                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1641
1642         if (kvm_is_ucontrol(vcpu->kvm))
1643                 return __kvm_ucontrol_vcpu_init(vcpu);
1644
1645         return 0;
1646 }
1647
1648 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1649 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1650 {
1651         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1652         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1653         vcpu->arch.cputm_start = get_tod_clock_fast();
1654         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1655 }
1656
1657 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1658 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1659 {
1660         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1661         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1662         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1663         vcpu->arch.cputm_start = 0;
1664         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1665 }
1666
1667 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1668 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1669 {
1670         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1671         vcpu->arch.cputm_enabled = true;
1672         __start_cpu_timer_accounting(vcpu);
1673 }
1674
1675 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1676 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1677 {
1678         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1679         __stop_cpu_timer_accounting(vcpu);
1680         vcpu->arch.cputm_enabled = false;
1681 }
1682
1683 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1684 {
1685         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1686         __enable_cpu_timer_accounting(vcpu);
1687         preempt_enable();
1688 }
1689
1690 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1691 {
1692         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1693         __disable_cpu_timer_accounting(vcpu);
1694         preempt_enable();
1695 }
1696
1697 /* set the cpu timer - may only be called from the VCPU thread itself */
1698 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1699 {
1700         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1701         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1702         if (vcpu->arch.cputm_enabled)
1703                 vcpu->arch.cputm_start = get_tod_clock_fast();
1704         vcpu->arch.sie_block->cputm = cputm;
1705         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1706         preempt_enable();
1707 }
1708
1709 /* update and get the cpu timer - can also be called from other VCPU threads */
1710 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1711 {
1712         unsigned int seq;
1713         __u64 value;
1714
1715         if (unlikely(!vcpu->arch.cputm_enabled))
1716                 return vcpu->arch.sie_block->cputm;
1717
1718         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1719         do {
1720                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1721                 /*
1722                  * If the writer would ever execute a read in the critical
1723                  * section, e.g. in irq context, we have a deadlock.
1724                  */
1725                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1726                 value = vcpu->arch.sie_block->cputm;
1727                 /* if cputm_start is 0, accounting is being started/stopped */
1728                 if (likely(vcpu->arch.cputm_start))
1729                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1730         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1731         preempt_enable();
1732         return value;
1733 }
1734
1735 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1736 {
1737         /* Save host register state */
1738         save_fpu_regs();
1739         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1740         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1741
1742         if (MACHINE_HAS_VX)
1743                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1744         else
1745                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1746         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1747         if (test_fp_ctl(current->thread.fpu.fpc))
1748                 /* User space provided an invalid FPC, let's clear it */
1749                 current->thread.fpu.fpc = 0;
1750
1751         save_access_regs(vcpu->arch.host_acrs);
1752         restore_access_regs(vcpu->run->s.regs.acrs);
1753         gmap_enable(vcpu->arch.enabled_gmap);
1754         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1755         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1756                 __start_cpu_timer_accounting(vcpu);
1757         vcpu->cpu = cpu;
1758 }
1759
1760 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1761 {
1762         vcpu->cpu = -1;
1763         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1764                 __stop_cpu_timer_accounting(vcpu);
1765         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1766         vcpu->arch.enabled_gmap = gmap_get_enabled();
1767         gmap_disable(vcpu->arch.enabled_gmap);
1768
1769         /* Save guest register state */
1770         save_fpu_regs();
1771         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1772
1773         /* Restore host register state */
1774         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1775         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1776
1777         save_access_regs(vcpu->run->s.regs.acrs);
1778         restore_access_regs(vcpu->arch.host_acrs);
1779 }
1780
1781 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1782 {
1783         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1784         vcpu->arch.sie_block->gpsw.mask = 0UL;
1785         vcpu->arch.sie_block->gpsw.addr = 0UL;
1786         kvm_s390_set_prefix(vcpu, 0);
1787         kvm_s390_set_cpu_timer(vcpu, 0);
1788         vcpu->arch.sie_block->ckc       = 0UL;
1789         vcpu->arch.sie_block->todpr     = 0;
1790         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1791         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1792         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1793         /* make sure the new fpc will be lazily loaded */
1794         save_fpu_regs();
1795         current->thread.fpu.fpc = 0;
1796         vcpu->arch.sie_block->gbea = 1;
1797         vcpu->arch.sie_block->pp = 0;
1798         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1799         kvm_clear_async_pf_completion_queue(vcpu);
1800         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1801                 kvm_s390_vcpu_stop(vcpu);
1802         kvm_s390_clear_local_irqs(vcpu);
1803 }
1804
1805 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1806 {
1807         mutex_lock(&vcpu->kvm->lock);
1808         preempt_disable();
1809         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1810         preempt_enable();
1811         mutex_unlock(&vcpu->kvm->lock);
1812         if (!kvm_is_ucontrol(vcpu->kvm)) {
1813                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1814                 sca_add_vcpu(vcpu);
1815         }
1816         /* make vcpu_load load the right gmap on the first trigger */
1817         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1818 }
1819
1820 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1821 {
1822         if (!test_kvm_facility(vcpu->kvm, 76))
1823                 return;
1824
1825         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1826
1827         if (vcpu->kvm->arch.crypto.aes_kw)
1828                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1829         if (vcpu->kvm->arch.crypto.dea_kw)
1830                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1831
1832         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1833 }
1834
1835 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1836 {
1837         free_page(vcpu->arch.sie_block->cbrlo);
1838         vcpu->arch.sie_block->cbrlo = 0;
1839 }
1840
1841 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1842 {
1843         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1844         if (!vcpu->arch.sie_block->cbrlo)
1845                 return -ENOMEM;
1846
1847         vcpu->arch.sie_block->ecb2 |= 0x80;
1848         vcpu->arch.sie_block->ecb2 &= ~0x08;
1849         return 0;
1850 }
1851
1852 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1853 {
1854         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1855
1856         vcpu->arch.sie_block->ibc = model->ibc;
1857         if (test_kvm_facility(vcpu->kvm, 7))
1858                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1859 }
1860
1861 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1862 {
1863         int rc = 0;
1864
1865         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1866                                                     CPUSTAT_SM |
1867                                                     CPUSTAT_STOPPED);
1868
1869         if (test_kvm_facility(vcpu->kvm, 78))
1870                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1871         else if (test_kvm_facility(vcpu->kvm, 8))
1872                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1873
1874         kvm_s390_vcpu_setup_model(vcpu);
1875
1876         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1877         if (MACHINE_HAS_ESOP)
1878                 vcpu->arch.sie_block->ecb |= 0x02;
1879         if (test_kvm_facility(vcpu->kvm, 9))
1880                 vcpu->arch.sie_block->ecb |= 0x04;
1881         if (test_kvm_facility(vcpu->kvm, 73))
1882                 vcpu->arch.sie_block->ecb |= 0x10;
1883
1884         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1885                 vcpu->arch.sie_block->ecb2 |= 0x08;
1886         vcpu->arch.sie_block->eca = 0x1002000U;
1887         if (sclp.has_cei)
1888                 vcpu->arch.sie_block->eca |= 0x80000000U;
1889         if (sclp.has_ib)
1890                 vcpu->arch.sie_block->eca |= 0x40000000U;
1891         if (sclp.has_siif)
1892                 vcpu->arch.sie_block->eca |= 1;
1893         if (sclp.has_sigpif)
1894                 vcpu->arch.sie_block->eca |= 0x10000000U;
1895         if (test_kvm_facility(vcpu->kvm, 64))
1896                 vcpu->arch.sie_block->ecb3 |= 0x01;
1897         if (test_kvm_facility(vcpu->kvm, 129)) {
1898                 vcpu->arch.sie_block->eca |= 0x00020000;
1899                 vcpu->arch.sie_block->ecd |= 0x20000000;
1900         }
1901         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1902         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1903         if (test_kvm_facility(vcpu->kvm, 74))
1904                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1905
1906         if (vcpu->kvm->arch.use_cmma) {
1907                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1908                 if (rc)
1909                         return rc;
1910         }
1911         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1912         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1913
1914         kvm_s390_vcpu_crypto_setup(vcpu);
1915
1916         return rc;
1917 }
1918
1919 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1920                                       unsigned int id)
1921 {
1922         struct kvm_vcpu *vcpu;
1923         struct sie_page *sie_page;
1924         int rc = -EINVAL;
1925
1926         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1927                 goto out;
1928
1929         rc = -ENOMEM;
1930
1931         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1932         if (!vcpu)
1933                 goto out;
1934
1935         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1936         if (!sie_page)
1937                 goto out_free_cpu;
1938
1939         vcpu->arch.sie_block = &sie_page->sie_block;
1940         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1941
1942         /* the real guest size will always be smaller than msl */
1943         vcpu->arch.sie_block->mso = 0;
1944         vcpu->arch.sie_block->msl = sclp.hamax;
1945
1946         vcpu->arch.sie_block->icpua = id;
1947         spin_lock_init(&vcpu->arch.local_int.lock);
1948         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1949         vcpu->arch.local_int.wq = &vcpu->wq;
1950         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1951         seqcount_init(&vcpu->arch.cputm_seqcount);
1952
1953         rc = kvm_vcpu_init(vcpu, kvm, id);
1954         if (rc)
1955                 goto out_free_sie_block;
1956         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1957                  vcpu->arch.sie_block);
1958         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1959
1960         return vcpu;
1961 out_free_sie_block:
1962         free_page((unsigned long)(vcpu->arch.sie_block));
1963 out_free_cpu:
1964         kmem_cache_free(kvm_vcpu_cache, vcpu);
1965 out:
1966         return ERR_PTR(rc);
1967 }
1968
1969 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1970 {
1971         return kvm_s390_vcpu_has_irq(vcpu, 0);
1972 }
1973
1974 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1975 {
1976         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1977         exit_sie(vcpu);
1978 }
1979
1980 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1981 {
1982         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1983 }
1984
1985 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1986 {
1987         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1988         exit_sie(vcpu);
1989 }
1990
1991 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1992 {
1993         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1994 }
1995
1996 /*
1997  * Kick a guest cpu out of SIE and wait until SIE is not running.
1998  * If the CPU is not running (e.g. waiting as idle) the function will
1999  * return immediately. */
2000 void exit_sie(struct kvm_vcpu *vcpu)
2001 {
2002         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2003         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2004                 cpu_relax();
2005 }
2006
2007 /* Kick a guest cpu out of SIE to process a request synchronously */
2008 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2009 {
2010         kvm_make_request(req, vcpu);
2011         kvm_s390_vcpu_request(vcpu);
2012 }
2013
2014 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2015                               unsigned long end)
2016 {
2017         struct kvm *kvm = gmap->private;
2018         struct kvm_vcpu *vcpu;
2019         unsigned long prefix;
2020         int i;
2021
2022         if (gmap_is_shadow(gmap))
2023                 return;
2024         if (start >= 1UL << 31)
2025                 /* We are only interested in prefix pages */
2026                 return;
2027         kvm_for_each_vcpu(i, vcpu, kvm) {
2028                 /* match against both prefix pages */
2029                 prefix = kvm_s390_get_prefix(vcpu);
2030                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2031                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2032                                    start, end);
2033                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2034                 }
2035         }
2036 }
2037
2038 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2039 {
2040         /* kvm common code refers to this, but never calls it */
2041         BUG();
2042         return 0;
2043 }
2044
2045 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2046                                            struct kvm_one_reg *reg)
2047 {
2048         int r = -EINVAL;
2049
2050         switch (reg->id) {
2051         case KVM_REG_S390_TODPR:
2052                 r = put_user(vcpu->arch.sie_block->todpr,
2053                              (u32 __user *)reg->addr);
2054                 break;
2055         case KVM_REG_S390_EPOCHDIFF:
2056                 r = put_user(vcpu->arch.sie_block->epoch,
2057                              (u64 __user *)reg->addr);
2058                 break;
2059         case KVM_REG_S390_CPU_TIMER:
2060                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2061                              (u64 __user *)reg->addr);
2062                 break;
2063         case KVM_REG_S390_CLOCK_COMP:
2064                 r = put_user(vcpu->arch.sie_block->ckc,
2065                              (u64 __user *)reg->addr);
2066                 break;
2067         case KVM_REG_S390_PFTOKEN:
2068                 r = put_user(vcpu->arch.pfault_token,
2069                              (u64 __user *)reg->addr);
2070                 break;
2071         case KVM_REG_S390_PFCOMPARE:
2072                 r = put_user(vcpu->arch.pfault_compare,
2073                              (u64 __user *)reg->addr);
2074                 break;
2075         case KVM_REG_S390_PFSELECT:
2076                 r = put_user(vcpu->arch.pfault_select,
2077                              (u64 __user *)reg->addr);
2078                 break;
2079         case KVM_REG_S390_PP:
2080                 r = put_user(vcpu->arch.sie_block->pp,
2081                              (u64 __user *)reg->addr);
2082                 break;
2083         case KVM_REG_S390_GBEA:
2084                 r = put_user(vcpu->arch.sie_block->gbea,
2085                              (u64 __user *)reg->addr);
2086                 break;
2087         default:
2088                 break;
2089         }
2090
2091         return r;
2092 }
2093
2094 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2095                                            struct kvm_one_reg *reg)
2096 {
2097         int r = -EINVAL;
2098         __u64 val;
2099
2100         switch (reg->id) {
2101         case KVM_REG_S390_TODPR:
2102                 r = get_user(vcpu->arch.sie_block->todpr,
2103                              (u32 __user *)reg->addr);
2104                 break;
2105         case KVM_REG_S390_EPOCHDIFF:
2106                 r = get_user(vcpu->arch.sie_block->epoch,
2107                              (u64 __user *)reg->addr);
2108                 break;
2109         case KVM_REG_S390_CPU_TIMER:
2110                 r = get_user(val, (u64 __user *)reg->addr);
2111                 if (!r)
2112                         kvm_s390_set_cpu_timer(vcpu, val);
2113                 break;
2114         case KVM_REG_S390_CLOCK_COMP:
2115                 r = get_user(vcpu->arch.sie_block->ckc,
2116                              (u64 __user *)reg->addr);
2117                 break;
2118         case KVM_REG_S390_PFTOKEN:
2119                 r = get_user(vcpu->arch.pfault_token,
2120                              (u64 __user *)reg->addr);
2121                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2122                         kvm_clear_async_pf_completion_queue(vcpu);
2123                 break;
2124         case KVM_REG_S390_PFCOMPARE:
2125                 r = get_user(vcpu->arch.pfault_compare,
2126                              (u64 __user *)reg->addr);
2127                 break;
2128         case KVM_REG_S390_PFSELECT:
2129                 r = get_user(vcpu->arch.pfault_select,
2130                              (u64 __user *)reg->addr);
2131                 break;
2132         case KVM_REG_S390_PP:
2133                 r = get_user(vcpu->arch.sie_block->pp,
2134                              (u64 __user *)reg->addr);
2135                 break;
2136         case KVM_REG_S390_GBEA:
2137                 r = get_user(vcpu->arch.sie_block->gbea,
2138                              (u64 __user *)reg->addr);
2139                 break;
2140         default:
2141                 break;
2142         }
2143
2144         return r;
2145 }
2146
2147 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2148 {
2149         kvm_s390_vcpu_initial_reset(vcpu);
2150         return 0;
2151 }
2152
2153 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2154 {
2155         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2156         return 0;
2157 }
2158
2159 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2160 {
2161         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2162         return 0;
2163 }
2164
2165 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2166                                   struct kvm_sregs *sregs)
2167 {
2168         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2169         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2170         restore_access_regs(vcpu->run->s.regs.acrs);
2171         return 0;
2172 }
2173
2174 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2175                                   struct kvm_sregs *sregs)
2176 {
2177         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2178         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2179         return 0;
2180 }
2181
2182 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2183 {
2184         /* make sure the new values will be lazily loaded */
2185         save_fpu_regs();
2186         if (test_fp_ctl(fpu->fpc))
2187                 return -EINVAL;
2188         current->thread.fpu.fpc = fpu->fpc;
2189         if (MACHINE_HAS_VX)
2190                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2191         else
2192                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2193         return 0;
2194 }
2195
2196 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2197 {
2198         /* make sure we have the latest values */
2199         save_fpu_regs();
2200         if (MACHINE_HAS_VX)
2201                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2202         else
2203                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2204         fpu->fpc = current->thread.fpu.fpc;
2205         return 0;
2206 }
2207
2208 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2209 {
2210         int rc = 0;
2211
2212         if (!is_vcpu_stopped(vcpu))
2213                 rc = -EBUSY;
2214         else {
2215                 vcpu->run->psw_mask = psw.mask;
2216                 vcpu->run->psw_addr = psw.addr;
2217         }
2218         return rc;
2219 }
2220
2221 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2222                                   struct kvm_translation *tr)
2223 {
2224         return -EINVAL; /* not implemented yet */
2225 }
2226
2227 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2228                               KVM_GUESTDBG_USE_HW_BP | \
2229                               KVM_GUESTDBG_ENABLE)
2230
2231 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2232                                         struct kvm_guest_debug *dbg)
2233 {
2234         int rc = 0;
2235
2236         vcpu->guest_debug = 0;
2237         kvm_s390_clear_bp_data(vcpu);
2238
2239         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2240                 return -EINVAL;
2241         if (!sclp.has_gpere)
2242                 return -EINVAL;
2243
2244         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2245                 vcpu->guest_debug = dbg->control;
2246                 /* enforce guest PER */
2247                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2248
2249                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2250                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2251         } else {
2252                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2253                 vcpu->arch.guestdbg.last_bp = 0;
2254         }
2255
2256         if (rc) {
2257                 vcpu->guest_debug = 0;
2258                 kvm_s390_clear_bp_data(vcpu);
2259                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2260         }
2261
2262         return rc;
2263 }
2264
2265 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2266                                     struct kvm_mp_state *mp_state)
2267 {
2268         /* CHECK_STOP and LOAD are not supported yet */
2269         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2270                                        KVM_MP_STATE_OPERATING;
2271 }
2272
2273 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2274                                     struct kvm_mp_state *mp_state)
2275 {
2276         int rc = 0;
2277
2278         /* user space knows about this interface - let it control the state */
2279         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2280
2281         switch (mp_state->mp_state) {
2282         case KVM_MP_STATE_STOPPED:
2283                 kvm_s390_vcpu_stop(vcpu);
2284                 break;
2285         case KVM_MP_STATE_OPERATING:
2286                 kvm_s390_vcpu_start(vcpu);
2287                 break;
2288         case KVM_MP_STATE_LOAD:
2289         case KVM_MP_STATE_CHECK_STOP:
2290                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2291         default:
2292                 rc = -ENXIO;
2293         }
2294
2295         return rc;
2296 }
2297
2298 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2299 {
2300         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2301 }
2302
2303 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2304 {
2305 retry:
2306         kvm_s390_vcpu_request_handled(vcpu);
2307         if (!vcpu->requests)
2308                 return 0;
2309         /*
2310          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2311          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2312          * This ensures that the ipte instruction for this request has
2313          * already finished. We might race against a second unmapper that
2314          * wants to set the blocking bit. Lets just retry the request loop.
2315          */
2316         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2317                 int rc;
2318                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2319                                           kvm_s390_get_prefix(vcpu),
2320                                           PAGE_SIZE * 2, PROT_WRITE);
2321                 if (rc)
2322                         return rc;
2323                 goto retry;
2324         }
2325
2326         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2327                 vcpu->arch.sie_block->ihcpu = 0xffff;
2328                 goto retry;
2329         }
2330
2331         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2332                 if (!ibs_enabled(vcpu)) {
2333                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2334                         atomic_or(CPUSTAT_IBS,
2335                                         &vcpu->arch.sie_block->cpuflags);
2336                 }
2337                 goto retry;
2338         }
2339
2340         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2341                 if (ibs_enabled(vcpu)) {
2342                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2343                         atomic_andnot(CPUSTAT_IBS,
2344                                           &vcpu->arch.sie_block->cpuflags);
2345                 }
2346                 goto retry;
2347         }
2348
2349         /* nothing to do, just clear the request */
2350         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2351
2352         return 0;
2353 }
2354
2355 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2356 {
2357         struct kvm_vcpu *vcpu;
2358         int i;
2359
2360         mutex_lock(&kvm->lock);
2361         preempt_disable();
2362         kvm->arch.epoch = tod - get_tod_clock();
2363         kvm_s390_vcpu_block_all(kvm);
2364         kvm_for_each_vcpu(i, vcpu, kvm)
2365                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2366         kvm_s390_vcpu_unblock_all(kvm);
2367         preempt_enable();
2368         mutex_unlock(&kvm->lock);
2369 }
2370
2371 /**
2372  * kvm_arch_fault_in_page - fault-in guest page if necessary
2373  * @vcpu: The corresponding virtual cpu
2374  * @gpa: Guest physical address
2375  * @writable: Whether the page should be writable or not
2376  *
2377  * Make sure that a guest page has been faulted-in on the host.
2378  *
2379  * Return: Zero on success, negative error code otherwise.
2380  */
2381 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2382 {
2383         return gmap_fault(vcpu->arch.gmap, gpa,
2384                           writable ? FAULT_FLAG_WRITE : 0);
2385 }
2386
2387 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2388                                       unsigned long token)
2389 {
2390         struct kvm_s390_interrupt inti;
2391         struct kvm_s390_irq irq;
2392
2393         if (start_token) {
2394                 irq.u.ext.ext_params2 = token;
2395                 irq.type = KVM_S390_INT_PFAULT_INIT;
2396                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2397         } else {
2398                 inti.type = KVM_S390_INT_PFAULT_DONE;
2399                 inti.parm64 = token;
2400                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2401         }
2402 }
2403
2404 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2405                                      struct kvm_async_pf *work)
2406 {
2407         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2408         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2409 }
2410
2411 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2412                                  struct kvm_async_pf *work)
2413 {
2414         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2415         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2416 }
2417
2418 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2419                                struct kvm_async_pf *work)
2420 {
2421         /* s390 will always inject the page directly */
2422 }
2423
2424 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2425 {
2426         /*
2427          * s390 will always inject the page directly,
2428          * but we still want check_async_completion to cleanup
2429          */
2430         return true;
2431 }
2432
2433 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2434 {
2435         hva_t hva;
2436         struct kvm_arch_async_pf arch;
2437         int rc;
2438
2439         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2440                 return 0;
2441         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2442             vcpu->arch.pfault_compare)
2443                 return 0;
2444         if (psw_extint_disabled(vcpu))
2445                 return 0;
2446         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2447                 return 0;
2448         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2449                 return 0;
2450         if (!vcpu->arch.gmap->pfault_enabled)
2451                 return 0;
2452
2453         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2454         hva += current->thread.gmap_addr & ~PAGE_MASK;
2455         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2456                 return 0;
2457
2458         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2459         return rc;
2460 }
2461
2462 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2463 {
2464         int rc, cpuflags;
2465
2466         /*
2467          * On s390 notifications for arriving pages will be delivered directly
2468          * to the guest but the house keeping for completed pfaults is
2469          * handled outside the worker.
2470          */
2471         kvm_check_async_pf_completion(vcpu);
2472
2473         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2474         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2475
2476         if (need_resched())
2477                 schedule();
2478
2479         if (test_cpu_flag(CIF_MCCK_PENDING))
2480                 s390_handle_mcck();
2481
2482         if (!kvm_is_ucontrol(vcpu->kvm)) {
2483                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2484                 if (rc)
2485                         return rc;
2486         }
2487
2488         rc = kvm_s390_handle_requests(vcpu);
2489         if (rc)
2490                 return rc;
2491
2492         if (guestdbg_enabled(vcpu)) {
2493                 kvm_s390_backup_guest_per_regs(vcpu);
2494                 kvm_s390_patch_guest_per_regs(vcpu);
2495         }
2496
2497         vcpu->arch.sie_block->icptcode = 0;
2498         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2499         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2500         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2501
2502         return 0;
2503 }
2504
2505 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2506 {
2507         struct kvm_s390_pgm_info pgm_info = {
2508                 .code = PGM_ADDRESSING,
2509         };
2510         u8 opcode, ilen;
2511         int rc;
2512
2513         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2514         trace_kvm_s390_sie_fault(vcpu);
2515
2516         /*
2517          * We want to inject an addressing exception, which is defined as a
2518          * suppressing or terminating exception. However, since we came here
2519          * by a DAT access exception, the PSW still points to the faulting
2520          * instruction since DAT exceptions are nullifying. So we've got
2521          * to look up the current opcode to get the length of the instruction
2522          * to be able to forward the PSW.
2523          */
2524         rc = read_guest_instr(vcpu, &opcode, 1);
2525         ilen = insn_length(opcode);
2526         if (rc < 0) {
2527                 return rc;
2528         } else if (rc) {
2529                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2530                  * Forward by arbitrary ilc, injection will take care of
2531                  * nullification if necessary.
2532                  */
2533                 pgm_info = vcpu->arch.pgm;
2534                 ilen = 4;
2535         }
2536         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2537         kvm_s390_forward_psw(vcpu, ilen);
2538         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2539 }
2540
2541 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2542 {
2543         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2544                    vcpu->arch.sie_block->icptcode);
2545         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2546
2547         if (guestdbg_enabled(vcpu))
2548                 kvm_s390_restore_guest_per_regs(vcpu);
2549
2550         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2551         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2552
2553         if (vcpu->arch.sie_block->icptcode > 0) {
2554                 int rc = kvm_handle_sie_intercept(vcpu);
2555
2556                 if (rc != -EOPNOTSUPP)
2557                         return rc;
2558                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2559                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2560                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2561                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2562                 return -EREMOTE;
2563         } else if (exit_reason != -EFAULT) {
2564                 vcpu->stat.exit_null++;
2565                 return 0;
2566         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2567                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2568                 vcpu->run->s390_ucontrol.trans_exc_code =
2569                                                 current->thread.gmap_addr;
2570                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2571                 return -EREMOTE;
2572         } else if (current->thread.gmap_pfault) {
2573                 trace_kvm_s390_major_guest_pfault(vcpu);
2574                 current->thread.gmap_pfault = 0;
2575                 if (kvm_arch_setup_async_pf(vcpu))
2576                         return 0;
2577                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2578         }
2579         return vcpu_post_run_fault_in_sie(vcpu);
2580 }
2581
2582 static int __vcpu_run(struct kvm_vcpu *vcpu)
2583 {
2584         int rc, exit_reason;
2585
2586         /*
2587          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2588          * ning the guest), so that memslots (and other stuff) are protected
2589          */
2590         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2591
2592         do {
2593                 rc = vcpu_pre_run(vcpu);
2594                 if (rc)
2595                         break;
2596
2597                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2598                 /*
2599                  * As PF_VCPU will be used in fault handler, between
2600                  * guest_enter and guest_exit should be no uaccess.
2601                  */
2602                 local_irq_disable();
2603                 __kvm_guest_enter();
2604                 __disable_cpu_timer_accounting(vcpu);
2605                 local_irq_enable();
2606                 exit_reason = sie64a(vcpu->arch.sie_block,
2607                                      vcpu->run->s.regs.gprs);
2608                 local_irq_disable();
2609                 __enable_cpu_timer_accounting(vcpu);
2610                 __kvm_guest_exit();
2611                 local_irq_enable();
2612                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2613
2614                 rc = vcpu_post_run(vcpu, exit_reason);
2615         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2616
2617         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2618         return rc;
2619 }
2620
2621 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2622 {
2623         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2624         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2625         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2626                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2627         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2628                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2629                 /* some control register changes require a tlb flush */
2630                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2631         }
2632         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2633                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2634                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2635                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2636                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2637                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2638         }
2639         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2640                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2641                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2642                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2643                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2644                         kvm_clear_async_pf_completion_queue(vcpu);
2645         }
2646         kvm_run->kvm_dirty_regs = 0;
2647 }
2648
2649 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2650 {
2651         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2652         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2653         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2654         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2655         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2656         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2657         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2658         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2659         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2660         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2661         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2662         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2663 }
2664
2665 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2666 {
2667         int rc;
2668         sigset_t sigsaved;
2669
2670         if (guestdbg_exit_pending(vcpu)) {
2671                 kvm_s390_prepare_debug_exit(vcpu);
2672                 return 0;
2673         }
2674
2675         if (vcpu->sigset_active)
2676                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2677
2678         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2679                 kvm_s390_vcpu_start(vcpu);
2680         } else if (is_vcpu_stopped(vcpu)) {
2681                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2682                                    vcpu->vcpu_id);
2683                 return -EINVAL;
2684         }
2685
2686         sync_regs(vcpu, kvm_run);
2687         enable_cpu_timer_accounting(vcpu);
2688
2689         might_fault();
2690         rc = __vcpu_run(vcpu);
2691
2692         if (signal_pending(current) && !rc) {
2693                 kvm_run->exit_reason = KVM_EXIT_INTR;
2694                 rc = -EINTR;
2695         }
2696
2697         if (guestdbg_exit_pending(vcpu) && !rc)  {
2698                 kvm_s390_prepare_debug_exit(vcpu);
2699                 rc = 0;
2700         }
2701
2702         if (rc == -EREMOTE) {
2703                 /* userspace support is needed, kvm_run has been prepared */
2704                 rc = 0;
2705         }
2706
2707         disable_cpu_timer_accounting(vcpu);
2708         store_regs(vcpu, kvm_run);
2709
2710         if (vcpu->sigset_active)
2711                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2712
2713         vcpu->stat.exit_userspace++;
2714         return rc;
2715 }
2716
2717 /*
2718  * store status at address
2719  * we use have two special cases:
2720  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2721  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2722  */
2723 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2724 {
2725         unsigned char archmode = 1;
2726         freg_t fprs[NUM_FPRS];
2727         unsigned int px;
2728         u64 clkcomp, cputm;
2729         int rc;
2730
2731         px = kvm_s390_get_prefix(vcpu);
2732         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2733                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2734                         return -EFAULT;
2735                 gpa = 0;
2736         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2737                 if (write_guest_real(vcpu, 163, &archmode, 1))
2738                         return -EFAULT;
2739                 gpa = px;
2740         } else
2741                 gpa -= __LC_FPREGS_SAVE_AREA;
2742
2743         /* manually convert vector registers if necessary */
2744         if (MACHINE_HAS_VX) {
2745                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2746                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2747                                      fprs, 128);
2748         } else {
2749                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2750                                      vcpu->run->s.regs.fprs, 128);
2751         }
2752         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2753                               vcpu->run->s.regs.gprs, 128);
2754         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2755                               &vcpu->arch.sie_block->gpsw, 16);
2756         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2757                               &px, 4);
2758         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2759                               &vcpu->run->s.regs.fpc, 4);
2760         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2761                               &vcpu->arch.sie_block->todpr, 4);
2762         cputm = kvm_s390_get_cpu_timer(vcpu);
2763         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2764                               &cputm, 8);
2765         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2766         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2767                               &clkcomp, 8);
2768         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2769                               &vcpu->run->s.regs.acrs, 64);
2770         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2771                               &vcpu->arch.sie_block->gcr, 128);
2772         return rc ? -EFAULT : 0;
2773 }
2774
2775 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2776 {
2777         /*
2778          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2779          * copying in vcpu load/put. Lets update our copies before we save
2780          * it into the save area
2781          */
2782         save_fpu_regs();
2783         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2784         save_access_regs(vcpu->run->s.regs.acrs);
2785
2786         return kvm_s390_store_status_unloaded(vcpu, addr);
2787 }
2788
2789 /*
2790  * store additional status at address
2791  */
2792 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2793                                         unsigned long gpa)
2794 {
2795         /* Only bits 0-53 are used for address formation */
2796         if (!(gpa & ~0x3ff))
2797                 return 0;
2798
2799         return write_guest_abs(vcpu, gpa & ~0x3ff,
2800                                (void *)&vcpu->run->s.regs.vrs, 512);
2801 }
2802
2803 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2804 {
2805         if (!test_kvm_facility(vcpu->kvm, 129))
2806                 return 0;
2807
2808         /*
2809          * The guest VXRS are in the host VXRs due to the lazy
2810          * copying in vcpu load/put. We can simply call save_fpu_regs()
2811          * to save the current register state because we are in the
2812          * middle of a load/put cycle.
2813          *
2814          * Let's update our copies before we save it into the save area.
2815          */
2816         save_fpu_regs();
2817
2818         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2819 }
2820
2821 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2822 {
2823         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2824         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2825 }
2826
2827 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2828 {
2829         unsigned int i;
2830         struct kvm_vcpu *vcpu;
2831
2832         kvm_for_each_vcpu(i, vcpu, kvm) {
2833                 __disable_ibs_on_vcpu(vcpu);
2834         }
2835 }
2836
2837 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2838 {
2839         if (!sclp.has_ibs)
2840                 return;
2841         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2842         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2843 }
2844
2845 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2846 {
2847         int i, online_vcpus, started_vcpus = 0;
2848
2849         if (!is_vcpu_stopped(vcpu))
2850                 return;
2851
2852         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2853         /* Only one cpu at a time may enter/leave the STOPPED state. */
2854         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2855         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2856
2857         for (i = 0; i < online_vcpus; i++) {
2858                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2859                         started_vcpus++;
2860         }
2861
2862         if (started_vcpus == 0) {
2863                 /* we're the only active VCPU -> speed it up */
2864                 __enable_ibs_on_vcpu(vcpu);
2865         } else if (started_vcpus == 1) {
2866                 /*
2867                  * As we are starting a second VCPU, we have to disable
2868                  * the IBS facility on all VCPUs to remove potentially
2869                  * oustanding ENABLE requests.
2870                  */
2871                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2872         }
2873
2874         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2875         /*
2876          * Another VCPU might have used IBS while we were offline.
2877          * Let's play safe and flush the VCPU at startup.
2878          */
2879         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2880         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2881         return;
2882 }
2883
2884 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2885 {
2886         int i, online_vcpus, started_vcpus = 0;
2887         struct kvm_vcpu *started_vcpu = NULL;
2888
2889         if (is_vcpu_stopped(vcpu))
2890                 return;
2891
2892         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2893         /* Only one cpu at a time may enter/leave the STOPPED state. */
2894         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2895         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2896
2897         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2898         kvm_s390_clear_stop_irq(vcpu);
2899
2900         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2901         __disable_ibs_on_vcpu(vcpu);
2902
2903         for (i = 0; i < online_vcpus; i++) {
2904                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2905                         started_vcpus++;
2906                         started_vcpu = vcpu->kvm->vcpus[i];
2907                 }
2908         }
2909
2910         if (started_vcpus == 1) {
2911                 /*
2912                  * As we only have one VCPU left, we want to enable the
2913                  * IBS facility for that VCPU to speed it up.
2914                  */
2915                 __enable_ibs_on_vcpu(started_vcpu);
2916         }
2917
2918         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2919         return;
2920 }
2921
2922 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2923                                      struct kvm_enable_cap *cap)
2924 {
2925         int r;
2926
2927         if (cap->flags)
2928                 return -EINVAL;
2929
2930         switch (cap->cap) {
2931         case KVM_CAP_S390_CSS_SUPPORT:
2932                 if (!vcpu->kvm->arch.css_support) {
2933                         vcpu->kvm->arch.css_support = 1;
2934                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2935                         trace_kvm_s390_enable_css(vcpu->kvm);
2936                 }
2937                 r = 0;
2938                 break;
2939         default:
2940                 r = -EINVAL;
2941                 break;
2942         }
2943         return r;
2944 }
2945
2946 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2947                                   struct kvm_s390_mem_op *mop)
2948 {
2949         void __user *uaddr = (void __user *)mop->buf;
2950         void *tmpbuf = NULL;
2951         int r, srcu_idx;
2952         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2953                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2954
2955         if (mop->flags & ~supported_flags)
2956                 return -EINVAL;
2957
2958         if (mop->size > MEM_OP_MAX_SIZE)
2959                 return -E2BIG;
2960
2961         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2962                 tmpbuf = vmalloc(mop->size);
2963                 if (!tmpbuf)
2964                         return -ENOMEM;
2965         }
2966
2967         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2968
2969         switch (mop->op) {
2970         case KVM_S390_MEMOP_LOGICAL_READ:
2971                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2972                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2973                                             mop->size, GACC_FETCH);
2974                         break;
2975                 }
2976                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2977                 if (r == 0) {
2978                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2979                                 r = -EFAULT;
2980                 }
2981                 break;
2982         case KVM_S390_MEMOP_LOGICAL_WRITE:
2983                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2984                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2985                                             mop->size, GACC_STORE);
2986                         break;
2987                 }
2988                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2989                         r = -EFAULT;
2990                         break;
2991                 }
2992                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2993                 break;
2994         default:
2995                 r = -EINVAL;
2996         }
2997
2998         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2999
3000         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3001                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3002
3003         vfree(tmpbuf);
3004         return r;
3005 }
3006
3007 long kvm_arch_vcpu_ioctl(struct file *filp,
3008                          unsigned int ioctl, unsigned long arg)
3009 {
3010         struct kvm_vcpu *vcpu = filp->private_data;
3011         void __user *argp = (void __user *)arg;
3012         int idx;
3013         long r;
3014
3015         switch (ioctl) {
3016         case KVM_S390_IRQ: {
3017                 struct kvm_s390_irq s390irq;
3018
3019                 r = -EFAULT;
3020                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3021                         break;
3022                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3023                 break;
3024         }
3025         case KVM_S390_INTERRUPT: {
3026                 struct kvm_s390_interrupt s390int;
3027                 struct kvm_s390_irq s390irq;
3028
3029                 r = -EFAULT;
3030                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3031                         break;
3032                 if (s390int_to_s390irq(&s390int, &s390irq))
3033                         return -EINVAL;
3034                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3035                 break;
3036         }
3037         case KVM_S390_STORE_STATUS:
3038                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3039                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3040                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3041                 break;
3042         case KVM_S390_SET_INITIAL_PSW: {
3043                 psw_t psw;
3044
3045                 r = -EFAULT;
3046                 if (copy_from_user(&psw, argp, sizeof(psw)))
3047                         break;
3048                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3049                 break;
3050         }
3051         case KVM_S390_INITIAL_RESET:
3052                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3053                 break;
3054         case KVM_SET_ONE_REG:
3055         case KVM_GET_ONE_REG: {
3056                 struct kvm_one_reg reg;
3057                 r = -EFAULT;
3058                 if (copy_from_user(&reg, argp, sizeof(reg)))
3059                         break;
3060                 if (ioctl == KVM_SET_ONE_REG)
3061                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3062                 else
3063                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3064                 break;
3065         }
3066 #ifdef CONFIG_KVM_S390_UCONTROL
3067         case KVM_S390_UCAS_MAP: {
3068                 struct kvm_s390_ucas_mapping ucasmap;
3069
3070                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3071                         r = -EFAULT;
3072                         break;
3073                 }
3074
3075                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3076                         r = -EINVAL;
3077                         break;
3078                 }
3079
3080                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3081                                      ucasmap.vcpu_addr, ucasmap.length);
3082                 break;
3083         }
3084         case KVM_S390_UCAS_UNMAP: {
3085                 struct kvm_s390_ucas_mapping ucasmap;
3086
3087                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3088                         r = -EFAULT;
3089                         break;
3090                 }
3091
3092                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3093                         r = -EINVAL;
3094                         break;
3095                 }
3096
3097                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3098                         ucasmap.length);
3099                 break;
3100         }
3101 #endif
3102         case KVM_S390_VCPU_FAULT: {
3103                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3104                 break;
3105         }
3106         case KVM_ENABLE_CAP:
3107         {
3108                 struct kvm_enable_cap cap;
3109                 r = -EFAULT;
3110                 if (copy_from_user(&cap, argp, sizeof(cap)))
3111                         break;
3112                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3113                 break;
3114         }
3115         case KVM_S390_MEM_OP: {
3116                 struct kvm_s390_mem_op mem_op;
3117
3118                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3119                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3120                 else
3121                         r = -EFAULT;
3122                 break;
3123         }
3124         case KVM_S390_SET_IRQ_STATE: {
3125                 struct kvm_s390_irq_state irq_state;
3126
3127                 r = -EFAULT;
3128                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3129                         break;
3130                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3131                     irq_state.len == 0 ||
3132                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3133                         r = -EINVAL;
3134                         break;
3135                 }
3136                 r = kvm_s390_set_irq_state(vcpu,
3137                                            (void __user *) irq_state.buf,
3138                                            irq_state.len);
3139                 break;
3140         }
3141         case KVM_S390_GET_IRQ_STATE: {
3142                 struct kvm_s390_irq_state irq_state;
3143
3144                 r = -EFAULT;
3145                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3146                         break;
3147                 if (irq_state.len == 0) {
3148                         r = -EINVAL;
3149                         break;
3150                 }
3151                 r = kvm_s390_get_irq_state(vcpu,
3152                                            (__u8 __user *)  irq_state.buf,
3153                                            irq_state.len);
3154                 break;
3155         }
3156         default:
3157                 r = -ENOTTY;
3158         }
3159         return r;
3160 }
3161
3162 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3163 {
3164 #ifdef CONFIG_KVM_S390_UCONTROL
3165         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3166                  && (kvm_is_ucontrol(vcpu->kvm))) {
3167                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3168                 get_page(vmf->page);
3169                 return 0;
3170         }
3171 #endif
3172         return VM_FAULT_SIGBUS;
3173 }
3174
3175 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3176                             unsigned long npages)
3177 {
3178         return 0;
3179 }
3180
3181 /* Section: memory related */
3182 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3183                                    struct kvm_memory_slot *memslot,
3184                                    const struct kvm_userspace_memory_region *mem,
3185                                    enum kvm_mr_change change)
3186 {
3187         /* A few sanity checks. We can have memory slots which have to be
3188            located/ended at a segment boundary (1MB). The memory in userland is
3189            ok to be fragmented into various different vmas. It is okay to mmap()
3190            and munmap() stuff in this slot after doing this call at any time */
3191
3192         if (mem->userspace_addr & 0xffffful)
3193                 return -EINVAL;
3194
3195         if (mem->memory_size & 0xffffful)
3196                 return -EINVAL;
3197
3198         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3199                 return -EINVAL;
3200
3201         return 0;
3202 }
3203
3204 void kvm_arch_commit_memory_region(struct kvm *kvm,
3205                                 const struct kvm_userspace_memory_region *mem,
3206                                 const struct kvm_memory_slot *old,
3207                                 const struct kvm_memory_slot *new,
3208                                 enum kvm_mr_change change)
3209 {
3210         int rc;
3211
3212         /* If the basics of the memslot do not change, we do not want
3213          * to update the gmap. Every update causes several unnecessary
3214          * segment translation exceptions. This is usually handled just
3215          * fine by the normal fault handler + gmap, but it will also
3216          * cause faults on the prefix page of running guest CPUs.
3217          */
3218         if (old->userspace_addr == mem->userspace_addr &&
3219             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3220             old->npages * PAGE_SIZE == mem->memory_size)
3221                 return;
3222
3223         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3224                 mem->guest_phys_addr, mem->memory_size);
3225         if (rc)
3226                 pr_warn("failed to commit memory region\n");
3227         return;
3228 }
3229
3230 static inline unsigned long nonhyp_mask(int i)
3231 {
3232         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3233
3234         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3235 }
3236
3237 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3238 {
3239         vcpu->valid_wakeup = false;
3240 }
3241
3242 static int __init kvm_s390_init(void)
3243 {
3244         int i;
3245
3246         if (!sclp.has_sief2) {
3247                 pr_info("SIE not available\n");
3248                 return -ENODEV;
3249         }
3250
3251         for (i = 0; i < 16; i++)
3252                 kvm_s390_fac_list_mask[i] |=
3253                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3254
3255         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3256 }
3257
3258 static void __exit kvm_s390_exit(void)
3259 {
3260         kvm_exit();
3261 }
3262
3263 module_init(kvm_s390_init);
3264 module_exit(kvm_s390_exit);
3265
3266 /*
3267  * Enable autoloading of the kvm module.
3268  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3269  * since x86 takes a different approach.
3270  */
3271 #include <linux/miscdevice.h>
3272 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3273 MODULE_ALIAS("devname:kvm");