arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33
  34 #include <asm/asm-offsets.h>
  35 #include <asm/lowcore.h>
  36 #include <asm/stp.h>
  37 #include <asm/pgtable.h>
  38 #include <asm/gmap.h>
  39 #include <asm/nmi.h>
  40 #include <asm/switch_to.h>
  41 #include <asm/isc.h>
  42 #include <asm/sclp.h>
  43 #include <asm/cpacf.h>
  44 #include <asm/timex.h>
  45 #include "kvm-s390.h"
  46 #include "gaccess.h"
  47
  48 #define KMSG_COMPONENT "kvm-s390"
  49 #undef pr_fmt
  50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  62
  63 struct kvm_stats_debugfs_item debugfs_entries[] = {
  64         { "userspace_handled", VCPU_STAT(exit_userspace) },
  65         { "exit_null", VCPU_STAT(exit_null) },
  66         { "exit_validity", VCPU_STAT(exit_validity) },
  67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  68         { "exit_external_request", VCPU_STAT(exit_external_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  84         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  85         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  86         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  87         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  88         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  89         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  90         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  91         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  92         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  93         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  94         { "instruction_spx", VCPU_STAT(instruction_spx) },
  95         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  96         { "instruction_stap", VCPU_STAT(instruction_stap) },
  97         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  98         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  99         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
 100         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 101         { "instruction_essa", VCPU_STAT(instruction_essa) },
 102         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 103         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 104         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 105         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 106         { "instruction_sie", VCPU_STAT(instruction_sie) },
 107         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 108         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 109         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 110         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 111         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 112         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 113         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 114         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 115         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 116         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 117         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 118         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 119         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 120         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 121         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 122         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 123         { "diagnose_10", VCPU_STAT(diagnose_10) },
 124         { "diagnose_44", VCPU_STAT(diagnose_44) },
 125         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 126         { "diagnose_258", VCPU_STAT(diagnose_258) },
 127         { "diagnose_308", VCPU_STAT(diagnose_308) },
 128         { "diagnose_500", VCPU_STAT(diagnose_500) },
 129         { NULL }
 130 };
 131
 132 /* allow nested virtualization in KVM (if enabled by user space) */
 133 static int nested;
 134 module_param(nested, int, S_IRUGO);
 135 MODULE_PARM_DESC(nested, "Nested virtualization support");
 136
 137 /* upper facilities limit for kvm */
 138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 139
 140 unsigned long kvm_s390_fac_list_mask_size(void)
 141 {
 142         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 143         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 144 }
 145
 146 /* available cpu features supported by kvm */
 147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 148 /* available subfunctions indicated via query / "test bit" */
 149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 150
 151 static struct gmap_notifier gmap_notifier;
 152 static struct gmap_notifier vsie_gmap_notifier;
 153 debug_info_t *kvm_s390_dbf;
 154
 155 /* Section: not file related */
 156 int kvm_arch_hardware_enable(void)
 157 {
 158         /* every s390 is virtualization enabled ;-) */
 159         return 0;
 160 }
 161
 162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 163                               unsigned long end);
 164
 165 /*
 166  * This callback is executed during stop_machine(). All CPUs are therefore
 167  * temporarily stopped. In order not to change guest behavior, we have to
 168  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 169  * so a CPU won't be stopped while calculating with the epoch.
 170  */
 171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 172                           void *v)
 173 {
 174         struct kvm *kvm;
 175         struct kvm_vcpu *vcpu;
 176         int i;
 177         unsigned long long *delta = v;
 178
 179         list_for_each_entry(kvm, &vm_list, vm_list) {
 180                 kvm->arch.epoch -= *delta;
 181                 kvm_for_each_vcpu(i, vcpu, kvm) {
 182                         vcpu->arch.sie_block->epoch -= *delta;
 183                         if (vcpu->arch.cputm_enabled)
 184                                 vcpu->arch.cputm_start += *delta;
 185                         if (vcpu->arch.vsie_block)
 186                                 vcpu->arch.vsie_block->epoch -= *delta;
 187                 }
 188         }
 189         return NOTIFY_OK;
 190 }
 191
 192 static struct notifier_block kvm_clock_notifier = {
 193         .notifier_call = kvm_clock_sync,
 194 };
 195
 196 int kvm_arch_hardware_setup(void)
 197 {
 198         gmap_notifier.notifier_call = kvm_gmap_notifier;
 199         gmap_register_pte_notifier(&gmap_notifier);
 200         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 201         gmap_register_pte_notifier(&vsie_gmap_notifier);
 202         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 203                                        &kvm_clock_notifier);
 204         return 0;
 205 }
 206
 207 void kvm_arch_hardware_unsetup(void)
 208 {
 209         gmap_unregister_pte_notifier(&gmap_notifier);
 210         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 211         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 212                                          &kvm_clock_notifier);
 213 }
 214
 215 static void allow_cpu_feat(unsigned long nr)
 216 {
 217         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 218 }
 219
 220 static inline int plo_test_bit(unsigned char nr)
 221 {
 222         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 223         int cc;
 224
 225         asm volatile(
 226                 /* Parameter registers are ignored for "test bit" */
 227                 "       plo     0,0,0,0(0)\n"
 228                 "       ipm     %0\n"
 229                 "       srl     %0,28\n"
 230                 : "=d" (cc)
 231                 : "d" (r0)
 232                 : "cc");
 233         return cc == 0;
 234 }
 235
 236 static void kvm_s390_cpu_feat_init(void)
 237 {
 238         int i;
 239
 240         for (i = 0; i < 256; ++i) {
 241                 if (plo_test_bit(i))
 242                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 243         }
 244
 245         if (test_facility(28)) /* TOD-clock steering */
 246                 ptff(kvm_s390_available_subfunc.ptff,
 247                      sizeof(kvm_s390_available_subfunc.ptff),
 248                      PTFF_QAF);
 249
 250         if (test_facility(17)) { /* MSA */
 251                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 252                               kvm_s390_available_subfunc.kmac);
 253                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 254                               kvm_s390_available_subfunc.kmc);
 255                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 256                               kvm_s390_available_subfunc.km);
 257                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 258                               kvm_s390_available_subfunc.kimd);
 259                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 260                               kvm_s390_available_subfunc.klmd);
 261         }
 262         if (test_facility(76)) /* MSA3 */
 263                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 264                               kvm_s390_available_subfunc.pckmo);
 265         if (test_facility(77)) { /* MSA4 */
 266                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 267                               kvm_s390_available_subfunc.kmctr);
 268                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 269                               kvm_s390_available_subfunc.kmf);
 270                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 271                               kvm_s390_available_subfunc.kmo);
 272                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 273                               kvm_s390_available_subfunc.pcc);
 274         }
 275         if (test_facility(57)) /* MSA5 */
 276                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
 277                               kvm_s390_available_subfunc.ppno);
 278
 279         if (MACHINE_HAS_ESOP)
 280                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 281         /*
 282          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 283          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 284          */
 285         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 286             !test_facility(3) || !nested)
 287                 return;
 288         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 289         if (sclp.has_64bscao)
 290                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 291         if (sclp.has_siif)
 292                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 293         if (sclp.has_gpere)
 294                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 295         if (sclp.has_gsls)
 296                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 297         if (sclp.has_ib)
 298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 299         if (sclp.has_cei)
 300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 301         if (sclp.has_ibs)
 302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 303         if (sclp.has_kss)
 304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 305         /*
 306          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 307          * all skey handling functions read/set the skey from the PGSTE
 308          * instead of the real storage key.
 309          *
 310          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 311          * pages being detected as preserved although they are resident.
 312          *
 313          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 314          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 315          *
 316          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 317          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 318          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 319          *
 320          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 321          * cannot easily shadow the SCA because of the ipte lock.
 322          */
 323 }
 324
 325 int kvm_arch_init(void *opaque)
 326 {
 327         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 328         if (!kvm_s390_dbf)
 329                 return -ENOMEM;
 330
 331         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 332                 debug_unregister(kvm_s390_dbf);
 333                 return -ENOMEM;
 334         }
 335
 336         kvm_s390_cpu_feat_init();
 337
 338         /* Register floating interrupt controller interface. */
 339         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 340 }
 341
 342 void kvm_arch_exit(void)
 343 {
 344         debug_unregister(kvm_s390_dbf);
 345 }
 346
 347 /* Section: device related */
 348 long kvm_arch_dev_ioctl(struct file *filp,
 349                         unsigned int ioctl, unsigned long arg)
 350 {
 351         if (ioctl == KVM_S390_ENABLE_SIE)
 352                 return s390_enable_sie();
 353         return -EINVAL;
 354 }
 355
 356 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 357 {
 358         int r;
 359
 360         switch (ext) {
 361         case KVM_CAP_S390_PSW:
 362         case KVM_CAP_S390_GMAP:
 363         case KVM_CAP_SYNC_MMU:
 364 #ifdef CONFIG_KVM_S390_UCONTROL
 365         case KVM_CAP_S390_UCONTROL:
 366 #endif
 367         case KVM_CAP_ASYNC_PF:
 368         case KVM_CAP_SYNC_REGS:
 369         case KVM_CAP_ONE_REG:
 370         case KVM_CAP_ENABLE_CAP:
 371         case KVM_CAP_S390_CSS_SUPPORT:
 372         case KVM_CAP_IOEVENTFD:
 373         case KVM_CAP_DEVICE_CTRL:
 374         case KVM_CAP_ENABLE_CAP_VM:
 375         case KVM_CAP_S390_IRQCHIP:
 376         case KVM_CAP_VM_ATTRIBUTES:
 377         case KVM_CAP_MP_STATE:
 378         case KVM_CAP_IMMEDIATE_EXIT:
 379         case KVM_CAP_S390_INJECT_IRQ:
 380         case KVM_CAP_S390_USER_SIGP:
 381         case KVM_CAP_S390_USER_STSI:
 382         case KVM_CAP_S390_SKEYS:
 383         case KVM_CAP_S390_IRQ_STATE:
 384         case KVM_CAP_S390_USER_INSTR0:
 385         case KVM_CAP_S390_AIS:
 386                 r = 1;
 387                 break;
 388         case KVM_CAP_S390_MEM_OP:
 389                 r = MEM_OP_MAX_SIZE;
 390                 break;
 391         case KVM_CAP_NR_VCPUS:
 392         case KVM_CAP_MAX_VCPUS:
 393                 r = KVM_S390_BSCA_CPU_SLOTS;
 394                 if (!kvm_s390_use_sca_entries())
 395                         r = KVM_MAX_VCPUS;
 396                 else if (sclp.has_esca && sclp.has_64bscao)
 397                         r = KVM_S390_ESCA_CPU_SLOTS;
 398                 break;
 399         case KVM_CAP_NR_MEMSLOTS:
 400                 r = KVM_USER_MEM_SLOTS;
 401                 break;
 402         case KVM_CAP_S390_COW:
 403                 r = MACHINE_HAS_ESOP;
 404                 break;
 405         case KVM_CAP_S390_VECTOR_REGISTERS:
 406                 r = MACHINE_HAS_VX;
 407                 break;
 408         case KVM_CAP_S390_RI:
 409                 r = test_facility(64);
 410                 break;
 411         case KVM_CAP_S390_GS:
 412                 r = test_facility(133);
 413                 break;
 414         default:
 415                 r = 0;
 416         }
 417         return r;
 418 }
 419
 420 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 421                                         struct kvm_memory_slot *memslot)
 422 {
 423         gfn_t cur_gfn, last_gfn;
 424         unsigned long address;
 425         struct gmap *gmap = kvm->arch.gmap;
 426
 427         /* Loop over all guest pages */
 428         last_gfn = memslot->base_gfn + memslot->npages;
 429         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 430                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 431
 432                 if (test_and_clear_guest_dirty(gmap->mm, address))
 433                         mark_page_dirty(kvm, cur_gfn);
 434                 if (fatal_signal_pending(current))
 435                         return;
 436                 cond_resched();
 437         }
 438 }
 439
 440 /* Section: vm related */
 441 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 442
 443 /*
 444  * Get (and clear) the dirty memory log for a memory slot.
 445  */
 446 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 447                                struct kvm_dirty_log *log)
 448 {
 449         int r;
 450         unsigned long n;
 451         struct kvm_memslots *slots;
 452         struct kvm_memory_slot *memslot;
 453         int is_dirty = 0;
 454
 455         if (kvm_is_ucontrol(kvm))
 456                 return -EINVAL;
 457
 458         mutex_lock(&kvm->slots_lock);
 459
 460         r = -EINVAL;
 461         if (log->slot >= KVM_USER_MEM_SLOTS)
 462                 goto out;
 463
 464         slots = kvm_memslots(kvm);
 465         memslot = id_to_memslot(slots, log->slot);
 466         r = -ENOENT;
 467         if (!memslot->dirty_bitmap)
 468                 goto out;
 469
 470         kvm_s390_sync_dirty_log(kvm, memslot);
 471         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 472         if (r)
 473                 goto out;
 474
 475         /* Clear the dirty log */
 476         if (is_dirty) {
 477                 n = kvm_dirty_bitmap_bytes(memslot);
 478                 memset(memslot->dirty_bitmap, 0, n);
 479         }
 480         r = 0;
 481 out:
 482         mutex_unlock(&kvm->slots_lock);
 483         return r;
 484 }
 485
 486 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 487 {
 488         unsigned int i;
 489         struct kvm_vcpu *vcpu;
 490
 491         kvm_for_each_vcpu(i, vcpu, kvm) {
 492                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 493         }
 494 }
 495
 496 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 497 {
 498         int r;
 499
 500         if (cap->flags)
 501                 return -EINVAL;
 502
 503         switch (cap->cap) {
 504         case KVM_CAP_S390_IRQCHIP:
 505                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 506                 kvm->arch.use_irqchip = 1;
 507                 r = 0;
 508                 break;
 509         case KVM_CAP_S390_USER_SIGP:
 510                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 511                 kvm->arch.user_sigp = 1;
 512                 r = 0;
 513                 break;
 514         case KVM_CAP_S390_VECTOR_REGISTERS:
 515                 mutex_lock(&kvm->lock);
 516                 if (kvm->created_vcpus) {
 517                         r = -EBUSY;
 518                 } else if (MACHINE_HAS_VX) {
 519                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 520                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 521                         if (test_facility(134)) {
 522                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 523                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 524                         }
 525                         if (test_facility(135)) {
 526                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 527                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 528                         }
 529                         r = 0;
 530                 } else
 531                         r = -EINVAL;
 532                 mutex_unlock(&kvm->lock);
 533                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 534                          r ? "(not available)" : "(success)");
 535                 break;
 536         case KVM_CAP_S390_RI:
 537                 r = -EINVAL;
 538                 mutex_lock(&kvm->lock);
 539                 if (kvm->created_vcpus) {
 540                         r = -EBUSY;
 541                 } else if (test_facility(64)) {
 542                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 543                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 544                         r = 0;
 545                 }
 546                 mutex_unlock(&kvm->lock);
 547                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 548                          r ? "(not available)" : "(success)");
 549                 break;
 550         case KVM_CAP_S390_AIS:
 551                 mutex_lock(&kvm->lock);
 552                 if (kvm->created_vcpus) {
 553                         r = -EBUSY;
 554                 } else {
 555                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 556                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 557                         kvm->arch.float_int.ais_enabled = 1;
 558                         r = 0;
 559                 }
 560                 mutex_unlock(&kvm->lock);
 561                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 562                          r ? "(not available)" : "(success)");
 563                 break;
 564         case KVM_CAP_S390_GS:
 565                 r = -EINVAL;
 566                 mutex_lock(&kvm->lock);
 567                 if (atomic_read(&kvm->online_vcpus)) {
 568                         r = -EBUSY;
 569                 } else if (test_facility(133)) {
 570                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 571                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 572                         r = 0;
 573                 }
 574                 mutex_unlock(&kvm->lock);
 575                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 576                          r ? "(not available)" : "(success)");
 577                 break;
 578         case KVM_CAP_S390_USER_STSI:
 579                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 580                 kvm->arch.user_stsi = 1;
 581                 r = 0;
 582                 break;
 583         case KVM_CAP_S390_USER_INSTR0:
 584                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 585                 kvm->arch.user_instr0 = 1;
 586                 icpt_operexc_on_all_vcpus(kvm);
 587                 r = 0;
 588                 break;
 589         default:
 590                 r = -EINVAL;
 591                 break;
 592         }
 593         return r;
 594 }
 595
 596 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 597 {
 598         int ret;
 599
 600         switch (attr->attr) {
 601         case KVM_S390_VM_MEM_LIMIT_SIZE:
 602                 ret = 0;
 603                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 604                          kvm->arch.mem_limit);
 605                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 606                         ret = -EFAULT;
 607                 break;
 608         default:
 609                 ret = -ENXIO;
 610                 break;
 611         }
 612         return ret;
 613 }
 614
 615 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 616 {
 617         int ret;
 618         unsigned int idx;
 619         switch (attr->attr) {
 620         case KVM_S390_VM_MEM_ENABLE_CMMA:
 621                 ret = -ENXIO;
 622                 if (!sclp.has_cmma)
 623                         break;
 624
 625                 ret = -EBUSY;
 626                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 627                 mutex_lock(&kvm->lock);
 628                 if (!kvm->created_vcpus) {
 629                         kvm->arch.use_cmma = 1;
 630                         ret = 0;
 631                 }
 632                 mutex_unlock(&kvm->lock);
 633                 break;
 634         case KVM_S390_VM_MEM_CLR_CMMA:
 635                 ret = -ENXIO;
 636                 if (!sclp.has_cmma)
 637                         break;
 638                 ret = -EINVAL;
 639                 if (!kvm->arch.use_cmma)
 640                         break;
 641
 642                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 643                 mutex_lock(&kvm->lock);
 644                 idx = srcu_read_lock(&kvm->srcu);
 645                 s390_reset_cmma(kvm->arch.gmap->mm);
 646                 srcu_read_unlock(&kvm->srcu, idx);
 647                 mutex_unlock(&kvm->lock);
 648                 ret = 0;
 649                 break;
 650         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 651                 unsigned long new_limit;
 652
 653                 if (kvm_is_ucontrol(kvm))
 654                         return -EINVAL;
 655
 656                 if (get_user(new_limit, (u64 __user *)attr->addr))
 657                         return -EFAULT;
 658
 659                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 660                     new_limit > kvm->arch.mem_limit)
 661                         return -E2BIG;
 662
 663                 if (!new_limit)
 664                         return -EINVAL;
 665
 666                 /* gmap_create takes last usable address */
 667                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 668                         new_limit -= 1;
 669
 670                 ret = -EBUSY;
 671                 mutex_lock(&kvm->lock);
 672                 if (!kvm->created_vcpus) {
 673                         /* gmap_create will round the limit up */
 674                         struct gmap *new = gmap_create(current->mm, new_limit);
 675
 676                         if (!new) {
 677                                 ret = -ENOMEM;
 678                         } else {
 679                                 gmap_remove(kvm->arch.gmap);
 680                                 new->private = kvm;
 681                                 kvm->arch.gmap = new;
 682                                 ret = 0;
 683                         }
 684                 }
 685                 mutex_unlock(&kvm->lock);
 686                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 687                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 688                          (void *) kvm->arch.gmap->asce);
 689                 break;
 690         }
 691         default:
 692                 ret = -ENXIO;
 693                 break;
 694         }
 695         return ret;
 696 }
 697
 698 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 699
 700 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 701 {
 702         struct kvm_vcpu *vcpu;
 703         int i;
 704
 705         if (!test_kvm_facility(kvm, 76))
 706                 return -EINVAL;
 707
 708         mutex_lock(&kvm->lock);
 709         switch (attr->attr) {
 710         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 711                 get_random_bytes(
 712                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 713                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 714                 kvm->arch.crypto.aes_kw = 1;
 715                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 716                 break;
 717         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 718                 get_random_bytes(
 719                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 720                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 721                 kvm->arch.crypto.dea_kw = 1;
 722                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 723                 break;
 724         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 725                 kvm->arch.crypto.aes_kw = 0;
 726                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 727                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 728                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 729                 break;
 730         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 731                 kvm->arch.crypto.dea_kw = 0;
 732                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 733                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 734                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 735                 break;
 736         default:
 737                 mutex_unlock(&kvm->lock);
 738                 return -ENXIO;
 739         }
 740
 741         kvm_for_each_vcpu(i, vcpu, kvm) {
 742                 kvm_s390_vcpu_crypto_setup(vcpu);
 743                 exit_sie(vcpu);
 744         }
 745         mutex_unlock(&kvm->lock);
 746         return 0;
 747 }
 748
 749 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 750 {
 751         u8 gtod_high;
 752
 753         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 754                                            sizeof(gtod_high)))
 755                 return -EFAULT;
 756
 757         if (gtod_high != 0)
 758                 return -EINVAL;
 759         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 760
 761         return 0;
 762 }
 763
 764 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 765 {
 766         u64 gtod;
 767
 768         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 769                 return -EFAULT;
 770
 771         kvm_s390_set_tod_clock(kvm, gtod);
 772         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 773         return 0;
 774 }
 775
 776 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 777 {
 778         int ret;
 779
 780         if (attr->flags)
 781                 return -EINVAL;
 782
 783         switch (attr->attr) {
 784         case KVM_S390_VM_TOD_HIGH:
 785                 ret = kvm_s390_set_tod_high(kvm, attr);
 786                 break;
 787         case KVM_S390_VM_TOD_LOW:
 788                 ret = kvm_s390_set_tod_low(kvm, attr);
 789                 break;
 790         default:
 791                 ret = -ENXIO;
 792                 break;
 793         }
 794         return ret;
 795 }
 796
 797 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 798 {
 799         u8 gtod_high = 0;
 800
 801         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 802                                          sizeof(gtod_high)))
 803                 return -EFAULT;
 804         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 805
 806         return 0;
 807 }
 808
 809 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 810 {
 811         u64 gtod;
 812
 813         gtod = kvm_s390_get_tod_clock_fast(kvm);
 814         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 815                 return -EFAULT;
 816         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 817
 818         return 0;
 819 }
 820
 821 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 822 {
 823         int ret;
 824
 825         if (attr->flags)
 826                 return -EINVAL;
 827
 828         switch (attr->attr) {
 829         case KVM_S390_VM_TOD_HIGH:
 830                 ret = kvm_s390_get_tod_high(kvm, attr);
 831                 break;
 832         case KVM_S390_VM_TOD_LOW:
 833                 ret = kvm_s390_get_tod_low(kvm, attr);
 834                 break;
 835         default:
 836                 ret = -ENXIO;
 837                 break;
 838         }
 839         return ret;
 840 }
 841
 842 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 843 {
 844         struct kvm_s390_vm_cpu_processor *proc;
 845         u16 lowest_ibc, unblocked_ibc;
 846         int ret = 0;
 847
 848         mutex_lock(&kvm->lock);
 849         if (kvm->created_vcpus) {
 850                 ret = -EBUSY;
 851                 goto out;
 852         }
 853         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 854         if (!proc) {
 855                 ret = -ENOMEM;
 856                 goto out;
 857         }
 858         if (!copy_from_user(proc, (void __user *)attr->addr,
 859                             sizeof(*proc))) {
 860                 kvm->arch.model.cpuid = proc->cpuid;
 861                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 862                 unblocked_ibc = sclp.ibc & 0xfff;
 863                 if (lowest_ibc && proc->ibc) {
 864                         if (proc->ibc > unblocked_ibc)
 865                                 kvm->arch.model.ibc = unblocked_ibc;
 866                         else if (proc->ibc < lowest_ibc)
 867                                 kvm->arch.model.ibc = lowest_ibc;
 868                         else
 869                                 kvm->arch.model.ibc = proc->ibc;
 870                 }
 871                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 872                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 873                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
 874                          kvm->arch.model.ibc,
 875                          kvm->arch.model.cpuid);
 876                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
 877                          kvm->arch.model.fac_list[0],
 878                          kvm->arch.model.fac_list[1],
 879                          kvm->arch.model.fac_list[2]);
 880         } else
 881                 ret = -EFAULT;
 882         kfree(proc);
 883 out:
 884         mutex_unlock(&kvm->lock);
 885         return ret;
 886 }
 887
 888 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 889                                        struct kvm_device_attr *attr)
 890 {
 891         struct kvm_s390_vm_cpu_feat data;
 892         int ret = -EBUSY;
 893
 894         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 895                 return -EFAULT;
 896         if (!bitmap_subset((unsigned long *) data.feat,
 897                            kvm_s390_available_cpu_feat,
 898                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 899                 return -EINVAL;
 900
 901         mutex_lock(&kvm->lock);
 902         if (!atomic_read(&kvm->online_vcpus)) {
 903                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 904                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 905                 ret = 0;
 906         }
 907         mutex_unlock(&kvm->lock);
 908         return ret;
 909 }
 910
 911 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 912                                           struct kvm_device_attr *attr)
 913 {
 914         /*
 915          * Once supported by kernel + hw, we have to store the subfunctions
 916          * in kvm->arch and remember that user space configured them.
 917          */
 918         return -ENXIO;
 919 }
 920
 921 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 922 {
 923         int ret = -ENXIO;
 924
 925         switch (attr->attr) {
 926         case KVM_S390_VM_CPU_PROCESSOR:
 927                 ret = kvm_s390_set_processor(kvm, attr);
 928                 break;
 929         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 930                 ret = kvm_s390_set_processor_feat(kvm, attr);
 931                 break;
 932         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 933                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 934                 break;
 935         }
 936         return ret;
 937 }
 938
 939 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 940 {
 941         struct kvm_s390_vm_cpu_processor *proc;
 942         int ret = 0;
 943
 944         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 945         if (!proc) {
 946                 ret = -ENOMEM;
 947                 goto out;
 948         }
 949         proc->cpuid = kvm->arch.model.cpuid;
 950         proc->ibc = kvm->arch.model.ibc;
 951         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 952                S390_ARCH_FAC_LIST_SIZE_BYTE);
 953         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
 954                  kvm->arch.model.ibc,
 955                  kvm->arch.model.cpuid);
 956         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
 957                  kvm->arch.model.fac_list[0],
 958                  kvm->arch.model.fac_list[1],
 959                  kvm->arch.model.fac_list[2]);
 960         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 961                 ret = -EFAULT;
 962         kfree(proc);
 963 out:
 964         return ret;
 965 }
 966
 967 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 968 {
 969         struct kvm_s390_vm_cpu_machine *mach;
 970         int ret = 0;
 971
 972         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 973         if (!mach) {
 974                 ret = -ENOMEM;
 975                 goto out;
 976         }
 977         get_cpu_id((struct cpuid *) &mach->cpuid);
 978         mach->ibc = sclp.ibc;
 979         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 980                S390_ARCH_FAC_LIST_SIZE_BYTE);
 981         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 982                sizeof(S390_lowcore.stfle_fac_list));
 983         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
 984                  kvm->arch.model.ibc,
 985                  kvm->arch.model.cpuid);
 986         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
 987                  mach->fac_mask[0],
 988                  mach->fac_mask[1],
 989                  mach->fac_mask[2]);
 990         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
 991                  mach->fac_list[0],
 992                  mach->fac_list[1],
 993                  mach->fac_list[2]);
 994         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 995                 ret = -EFAULT;
 996         kfree(mach);
 997 out:
 998         return ret;
 999 }
1000
1001 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1002                                        struct kvm_device_attr *attr)
1003 {
1004         struct kvm_s390_vm_cpu_feat data;
1005
1006         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1007                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1008         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1009                 return -EFAULT;
1010         return 0;
1011 }
1012
1013 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1014                                      struct kvm_device_attr *attr)
1015 {
1016         struct kvm_s390_vm_cpu_feat data;
1017
1018         bitmap_copy((unsigned long *) data.feat,
1019                     kvm_s390_available_cpu_feat,
1020                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1021         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1022                 return -EFAULT;
1023         return 0;
1024 }
1025
1026 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1027                                           struct kvm_device_attr *attr)
1028 {
1029         /*
1030          * Once we can actually configure subfunctions (kernel + hw support),
1031          * we have to check if they were already set by user space, if so copy
1032          * them from kvm->arch.
1033          */
1034         return -ENXIO;
1035 }
1036
1037 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1038                                         struct kvm_device_attr *attr)
1039 {
1040         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1041             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1042                 return -EFAULT;
1043         return 0;
1044 }
1045 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1046 {
1047         int ret = -ENXIO;
1048
1049         switch (attr->attr) {
1050         case KVM_S390_VM_CPU_PROCESSOR:
1051                 ret = kvm_s390_get_processor(kvm, attr);
1052                 break;
1053         case KVM_S390_VM_CPU_MACHINE:
1054                 ret = kvm_s390_get_machine(kvm, attr);
1055                 break;
1056         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1057                 ret = kvm_s390_get_processor_feat(kvm, attr);
1058                 break;
1059         case KVM_S390_VM_CPU_MACHINE_FEAT:
1060                 ret = kvm_s390_get_machine_feat(kvm, attr);
1061                 break;
1062         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1063                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1064                 break;
1065         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1066                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1067                 break;
1068         }
1069         return ret;
1070 }
1071
1072 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1073 {
1074         int ret;
1075
1076         switch (attr->group) {
1077         case KVM_S390_VM_MEM_CTRL:
1078                 ret = kvm_s390_set_mem_control(kvm, attr);
1079                 break;
1080         case KVM_S390_VM_TOD:
1081                 ret = kvm_s390_set_tod(kvm, attr);
1082                 break;
1083         case KVM_S390_VM_CPU_MODEL:
1084                 ret = kvm_s390_set_cpu_model(kvm, attr);
1085                 break;
1086         case KVM_S390_VM_CRYPTO:
1087                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1088                 break;
1089         default:
1090                 ret = -ENXIO;
1091                 break;
1092         }
1093
1094         return ret;
1095 }
1096
1097 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099         int ret;
1100
1101         switch (attr->group) {
1102         case KVM_S390_VM_MEM_CTRL:
1103                 ret = kvm_s390_get_mem_control(kvm, attr);
1104                 break;
1105         case KVM_S390_VM_TOD:
1106                 ret = kvm_s390_get_tod(kvm, attr);
1107                 break;
1108         case KVM_S390_VM_CPU_MODEL:
1109                 ret = kvm_s390_get_cpu_model(kvm, attr);
1110                 break;
1111         default:
1112                 ret = -ENXIO;
1113                 break;
1114         }
1115
1116         return ret;
1117 }
1118
1119 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1120 {
1121         int ret;
1122
1123         switch (attr->group) {
1124         case KVM_S390_VM_MEM_CTRL:
1125                 switch (attr->attr) {
1126                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1127                 case KVM_S390_VM_MEM_CLR_CMMA:
1128                         ret = sclp.has_cmma ? 0 : -ENXIO;
1129                         break;
1130                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1131                         ret = 0;
1132                         break;
1133                 default:
1134                         ret = -ENXIO;
1135                         break;
1136                 }
1137                 break;
1138         case KVM_S390_VM_TOD:
1139                 switch (attr->attr) {
1140                 case KVM_S390_VM_TOD_LOW:
1141                 case KVM_S390_VM_TOD_HIGH:
1142                         ret = 0;
1143                         break;
1144                 default:
1145                         ret = -ENXIO;
1146                         break;
1147                 }
1148                 break;
1149         case KVM_S390_VM_CPU_MODEL:
1150                 switch (attr->attr) {
1151                 case KVM_S390_VM_CPU_PROCESSOR:
1152                 case KVM_S390_VM_CPU_MACHINE:
1153                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1154                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1155                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1156                         ret = 0;
1157                         break;
1158                 /* configuring subfunctions is not supported yet */
1159                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1160                 default:
1161                         ret = -ENXIO;
1162                         break;
1163                 }
1164                 break;
1165         case KVM_S390_VM_CRYPTO:
1166                 switch (attr->attr) {
1167                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1168                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1169                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1170                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1171                         ret = 0;
1172                         break;
1173                 default:
1174                         ret = -ENXIO;
1175                         break;
1176                 }
1177                 break;
1178         default:
1179                 ret = -ENXIO;
1180                 break;
1181         }
1182
1183         return ret;
1184 }
1185
1186 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1187 {
1188         uint8_t *keys;
1189         uint64_t hva;
1190         int i, r = 0;
1191
1192         if (args->flags != 0)
1193                 return -EINVAL;
1194
1195         /* Is this guest using storage keys? */
1196         if (!mm_use_skey(current->mm))
1197                 return KVM_S390_GET_SKEYS_NONE;
1198
1199         /* Enforce sane limit on memory allocation */
1200         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1201                 return -EINVAL;
1202
1203         keys = kmalloc_array(args->count, sizeof(uint8_t),
1204                              GFP_KERNEL | __GFP_NOWARN);
1205         if (!keys)
1206                 keys = vmalloc(sizeof(uint8_t) * args->count);
1207         if (!keys)
1208                 return -ENOMEM;
1209
1210         down_read(&current->mm->mmap_sem);
1211         for (i = 0; i < args->count; i++) {
1212                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1213                 if (kvm_is_error_hva(hva)) {
1214                         r = -EFAULT;
1215                         break;
1216                 }
1217
1218                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1219                 if (r)
1220                         break;
1221         }
1222         up_read(&current->mm->mmap_sem);
1223
1224         if (!r) {
1225                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1226                                  sizeof(uint8_t) * args->count);
1227                 if (r)
1228                         r = -EFAULT;
1229         }
1230
1231         kvfree(keys);
1232         return r;
1233 }
1234
1235 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1236 {
1237         uint8_t *keys;
1238         uint64_t hva;
1239         int i, r = 0;
1240
1241         if (args->flags != 0)
1242                 return -EINVAL;
1243
1244         /* Enforce sane limit on memory allocation */
1245         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1246                 return -EINVAL;
1247
1248         keys = kmalloc_array(args->count, sizeof(uint8_t),
1249                              GFP_KERNEL | __GFP_NOWARN);
1250         if (!keys)
1251                 keys = vmalloc(sizeof(uint8_t) * args->count);
1252         if (!keys)
1253                 return -ENOMEM;
1254
1255         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1256                            sizeof(uint8_t) * args->count);
1257         if (r) {
1258                 r = -EFAULT;
1259                 goto out;
1260         }
1261
1262         /* Enable storage key handling for the guest */
1263         r = s390_enable_skey();
1264         if (r)
1265                 goto out;
1266
1267         down_read(&current->mm->mmap_sem);
1268         for (i = 0; i < args->count; i++) {
1269                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1270                 if (kvm_is_error_hva(hva)) {
1271                         r = -EFAULT;
1272                         break;
1273                 }
1274
1275                 /* Lowest order bit is reserved */
1276                 if (keys[i] & 0x01) {
1277                         r = -EINVAL;
1278                         break;
1279                 }
1280
1281                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1282                 if (r)
1283                         break;
1284         }
1285         up_read(&current->mm->mmap_sem);
1286 out:
1287         kvfree(keys);
1288         return r;
1289 }
1290
1291 long kvm_arch_vm_ioctl(struct file *filp,
1292                        unsigned int ioctl, unsigned long arg)
1293 {
1294         struct kvm *kvm = filp->private_data;
1295         void __user *argp = (void __user *)arg;
1296         struct kvm_device_attr attr;
1297         int r;
1298
1299         switch (ioctl) {
1300         case KVM_S390_INTERRUPT: {
1301                 struct kvm_s390_interrupt s390int;
1302
1303                 r = -EFAULT;
1304                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1305                         break;
1306                 r = kvm_s390_inject_vm(kvm, &s390int);
1307                 break;
1308         }
1309         case KVM_ENABLE_CAP: {
1310                 struct kvm_enable_cap cap;
1311                 r = -EFAULT;
1312                 if (copy_from_user(&cap, argp, sizeof(cap)))
1313                         break;
1314                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1315                 break;
1316         }
1317         case KVM_CREATE_IRQCHIP: {
1318                 struct kvm_irq_routing_entry routing;
1319
1320                 r = -EINVAL;
1321                 if (kvm->arch.use_irqchip) {
1322                         /* Set up dummy routing. */
1323                         memset(&routing, 0, sizeof(routing));
1324                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1325                 }
1326                 break;
1327         }
1328         case KVM_SET_DEVICE_ATTR: {
1329                 r = -EFAULT;
1330                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1331                         break;
1332                 r = kvm_s390_vm_set_attr(kvm, &attr);
1333                 break;
1334         }
1335         case KVM_GET_DEVICE_ATTR: {
1336                 r = -EFAULT;
1337                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1338                         break;
1339                 r = kvm_s390_vm_get_attr(kvm, &attr);
1340                 break;
1341         }
1342         case KVM_HAS_DEVICE_ATTR: {
1343                 r = -EFAULT;
1344                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1345                         break;
1346                 r = kvm_s390_vm_has_attr(kvm, &attr);
1347                 break;
1348         }
1349         case KVM_S390_GET_SKEYS: {
1350                 struct kvm_s390_skeys args;
1351
1352                 r = -EFAULT;
1353                 if (copy_from_user(&args, argp,
1354                                    sizeof(struct kvm_s390_skeys)))
1355                         break;
1356                 r = kvm_s390_get_skeys(kvm, &args);
1357                 break;
1358         }
1359         case KVM_S390_SET_SKEYS: {
1360                 struct kvm_s390_skeys args;
1361
1362                 r = -EFAULT;
1363                 if (copy_from_user(&args, argp,
1364                                    sizeof(struct kvm_s390_skeys)))
1365                         break;
1366                 r = kvm_s390_set_skeys(kvm, &args);
1367                 break;
1368         }
1369         default:
1370                 r = -ENOTTY;
1371         }
1372
1373         return r;
1374 }
1375
1376 static int kvm_s390_query_ap_config(u8 *config)
1377 {
1378         u32 fcn_code = 0x04000000UL;
1379         u32 cc = 0;
1380
1381         memset(config, 0, 128);
1382         asm volatile(
1383                 "lgr 0,%1\n"
1384                 "lgr 2,%2\n"
1385                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1386                 "0: ipm %0\n"
1387                 "srl %0,28\n"
1388                 "1:\n"
1389                 EX_TABLE(0b, 1b)
1390                 : "+r" (cc)
1391                 : "r" (fcn_code), "r" (config)
1392                 : "cc", "0", "2", "memory"
1393         );
1394
1395         return cc;
1396 }
1397
1398 static int kvm_s390_apxa_installed(void)
1399 {
1400         u8 config[128];
1401         int cc;
1402
1403         if (test_facility(12)) {
1404                 cc = kvm_s390_query_ap_config(config);
1405
1406                 if (cc)
1407                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1408                 else
1409                         return config[0] & 0x40;
1410         }
1411
1412         return 0;
1413 }
1414
1415 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1416 {
1417         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1418
1419         if (kvm_s390_apxa_installed())
1420                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1421         else
1422                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1423 }
1424
1425 static u64 kvm_s390_get_initial_cpuid(void)
1426 {
1427         struct cpuid cpuid;
1428
1429         get_cpu_id(&cpuid);
1430         cpuid.version = 0xff;
1431         return *((u64 *) &cpuid);
1432 }
1433
1434 static void kvm_s390_crypto_init(struct kvm *kvm)
1435 {
1436         if (!test_kvm_facility(kvm, 76))
1437                 return;
1438
1439         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1440         kvm_s390_set_crycb_format(kvm);
1441
1442         /* Enable AES/DEA protected key functions by default */
1443         kvm->arch.crypto.aes_kw = 1;
1444         kvm->arch.crypto.dea_kw = 1;
1445         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1446                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1447         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1448                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1449 }
1450
1451 static void sca_dispose(struct kvm *kvm)
1452 {
1453         if (kvm->arch.use_esca)
1454                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1455         else
1456                 free_page((unsigned long)(kvm->arch.sca));
1457         kvm->arch.sca = NULL;
1458 }
1459
1460 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1461 {
1462         gfp_t alloc_flags = GFP_KERNEL;
1463         int i, rc;
1464         char debug_name[16];
1465         static unsigned long sca_offset;
1466
1467         rc = -EINVAL;
1468 #ifdef CONFIG_KVM_S390_UCONTROL
1469         if (type & ~KVM_VM_S390_UCONTROL)
1470                 goto out_err;
1471         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1472                 goto out_err;
1473 #else
1474         if (type)
1475                 goto out_err;
1476 #endif
1477
1478         rc = s390_enable_sie();
1479         if (rc)
1480                 goto out_err;
1481
1482         rc = -ENOMEM;
1483
1484         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1485
1486         kvm->arch.use_esca = 0; /* start with basic SCA */
1487         if (!sclp.has_64bscao)
1488                 alloc_flags |= GFP_DMA;
1489         rwlock_init(&kvm->arch.sca_lock);
1490         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1491         if (!kvm->arch.sca)
1492                 goto out_err;
1493         spin_lock(&kvm_lock);
1494         sca_offset += 16;
1495         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1496                 sca_offset = 0;
1497         kvm->arch.sca = (struct bsca_block *)
1498                         ((char *) kvm->arch.sca + sca_offset);
1499         spin_unlock(&kvm_lock);
1500
1501         sprintf(debug_name, "kvm-%u", current->pid);
1502
1503         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1504         if (!kvm->arch.dbf)
1505                 goto out_err;
1506
1507         kvm->arch.sie_page2 =
1508              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1509         if (!kvm->arch.sie_page2)
1510                 goto out_err;
1511
1512         /* Populate the facility mask initially. */
1513         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1514                sizeof(S390_lowcore.stfle_fac_list));
1515         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1516                 if (i < kvm_s390_fac_list_mask_size())
1517                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1518                 else
1519                         kvm->arch.model.fac_mask[i] = 0UL;
1520         }
1521
1522         /* Populate the facility list initially. */
1523         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1524         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1525                S390_ARCH_FAC_LIST_SIZE_BYTE);
1526
1527         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1528         set_kvm_facility(kvm->arch.model.fac_list, 74);
1529
1530         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1531         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1532
1533         kvm_s390_crypto_init(kvm);
1534
1535         mutex_init(&kvm->arch.float_int.ais_lock);
1536         kvm->arch.float_int.simm = 0;
1537         kvm->arch.float_int.nimm = 0;
1538         kvm->arch.float_int.ais_enabled = 0;
1539         spin_lock_init(&kvm->arch.float_int.lock);
1540         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1541                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1542         init_waitqueue_head(&kvm->arch.ipte_wq);
1543         mutex_init(&kvm->arch.ipte_mutex);
1544
1545         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1546         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1547
1548         if (type & KVM_VM_S390_UCONTROL) {
1549                 kvm->arch.gmap = NULL;
1550                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1551         } else {
1552                 if (sclp.hamax == U64_MAX)
1553                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1554                 else
1555                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1556                                                     sclp.hamax + 1);
1557                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1558                 if (!kvm->arch.gmap)
1559                         goto out_err;
1560                 kvm->arch.gmap->private = kvm;
1561                 kvm->arch.gmap->pfault_enabled = 0;
1562         }
1563
1564         kvm->arch.css_support = 0;
1565         kvm->arch.use_irqchip = 0;
1566         kvm->arch.epoch = 0;
1567
1568         spin_lock_init(&kvm->arch.start_stop_lock);
1569         kvm_s390_vsie_init(kvm);
1570         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1571
1572         return 0;
1573 out_err:
1574         free_page((unsigned long)kvm->arch.sie_page2);
1575         debug_unregister(kvm->arch.dbf);
1576         sca_dispose(kvm);
1577         KVM_EVENT(3, "creation of vm failed: %d", rc);
1578         return rc;
1579 }
1580
1581 bool kvm_arch_has_vcpu_debugfs(void)
1582 {
1583         return false;
1584 }
1585
1586 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1587 {
1588         return 0;
1589 }
1590
1591 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1592 {
1593         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1594         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1595         kvm_s390_clear_local_irqs(vcpu);
1596         kvm_clear_async_pf_completion_queue(vcpu);
1597         if (!kvm_is_ucontrol(vcpu->kvm))
1598                 sca_del_vcpu(vcpu);
1599
1600         if (kvm_is_ucontrol(vcpu->kvm))
1601                 gmap_remove(vcpu->arch.gmap);
1602
1603         if (vcpu->kvm->arch.use_cmma)
1604                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1605         free_page((unsigned long)(vcpu->arch.sie_block));
1606
1607         kvm_vcpu_uninit(vcpu);
1608         kmem_cache_free(kvm_vcpu_cache, vcpu);
1609 }
1610
1611 static void kvm_free_vcpus(struct kvm *kvm)
1612 {
1613         unsigned int i;
1614         struct kvm_vcpu *vcpu;
1615
1616         kvm_for_each_vcpu(i, vcpu, kvm)
1617                 kvm_arch_vcpu_destroy(vcpu);
1618
1619         mutex_lock(&kvm->lock);
1620         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1621                 kvm->vcpus[i] = NULL;
1622
1623         atomic_set(&kvm->online_vcpus, 0);
1624         mutex_unlock(&kvm->lock);
1625 }
1626
1627 void kvm_arch_destroy_vm(struct kvm *kvm)
1628 {
1629         kvm_free_vcpus(kvm);
1630         sca_dispose(kvm);
1631         debug_unregister(kvm->arch.dbf);
1632         free_page((unsigned long)kvm->arch.sie_page2);
1633         if (!kvm_is_ucontrol(kvm))
1634                 gmap_remove(kvm->arch.gmap);
1635         kvm_s390_destroy_adapters(kvm);
1636         kvm_s390_clear_float_irqs(kvm);
1637         kvm_s390_vsie_destroy(kvm);
1638         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1639 }
1640
1641 /* Section: vcpu related */
1642 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1643 {
1644         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1645         if (!vcpu->arch.gmap)
1646                 return -ENOMEM;
1647         vcpu->arch.gmap->private = vcpu->kvm;
1648
1649         return 0;
1650 }
1651
1652 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1653 {
1654         if (!kvm_s390_use_sca_entries())
1655                 return;
1656         read_lock(&vcpu->kvm->arch.sca_lock);
1657         if (vcpu->kvm->arch.use_esca) {
1658                 struct esca_block *sca = vcpu->kvm->arch.sca;
1659
1660                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1661                 sca->cpu[vcpu->vcpu_id].sda = 0;
1662         } else {
1663                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1664
1665                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1666                 sca->cpu[vcpu->vcpu_id].sda = 0;
1667         }
1668         read_unlock(&vcpu->kvm->arch.sca_lock);
1669 }
1670
1671 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1672 {
1673         if (!kvm_s390_use_sca_entries()) {
1674                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1675
1676                 /* we still need the basic sca for the ipte control */
1677                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1678                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1679         }
1680         read_lock(&vcpu->kvm->arch.sca_lock);
1681         if (vcpu->kvm->arch.use_esca) {
1682                 struct esca_block *sca = vcpu->kvm->arch.sca;
1683
1684                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1685                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1686                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1687                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1688                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1689         } else {
1690                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1691
1692                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1693                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1694                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1695                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1696         }
1697         read_unlock(&vcpu->kvm->arch.sca_lock);
1698 }
1699
1700 /* Basic SCA to Extended SCA data copy routines */
1701 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1702 {
1703         d->sda = s->sda;
1704         d->sigp_ctrl.c = s->sigp_ctrl.c;
1705         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1706 }
1707
1708 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1709 {
1710         int i;
1711
1712         d->ipte_control = s->ipte_control;
1713         d->mcn[0] = s->mcn;
1714         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1715                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1716 }
1717
1718 static int sca_switch_to_extended(struct kvm *kvm)
1719 {
1720         struct bsca_block *old_sca = kvm->arch.sca;
1721         struct esca_block *new_sca;
1722         struct kvm_vcpu *vcpu;
1723         unsigned int vcpu_idx;
1724         u32 scaol, scaoh;
1725
1726         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1727         if (!new_sca)
1728                 return -ENOMEM;
1729
1730         scaoh = (u32)((u64)(new_sca) >> 32);
1731         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1732
1733         kvm_s390_vcpu_block_all(kvm);
1734         write_lock(&kvm->arch.sca_lock);
1735
1736         sca_copy_b_to_e(new_sca, old_sca);
1737
1738         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1739                 vcpu->arch.sie_block->scaoh = scaoh;
1740                 vcpu->arch.sie_block->scaol = scaol;
1741                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1742         }
1743         kvm->arch.sca = new_sca;
1744         kvm->arch.use_esca = 1;
1745
1746         write_unlock(&kvm->arch.sca_lock);
1747         kvm_s390_vcpu_unblock_all(kvm);
1748
1749         free_page((unsigned long)old_sca);
1750
1751         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1752                  old_sca, kvm->arch.sca);
1753         return 0;
1754 }
1755
1756 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1757 {
1758         int rc;
1759
1760         if (!kvm_s390_use_sca_entries()) {
1761                 if (id < KVM_MAX_VCPUS)
1762                         return true;
1763                 return false;
1764         }
1765         if (id < KVM_S390_BSCA_CPU_SLOTS)
1766                 return true;
1767         if (!sclp.has_esca || !sclp.has_64bscao)
1768                 return false;
1769
1770         mutex_lock(&kvm->lock);
1771         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1772         mutex_unlock(&kvm->lock);
1773
1774         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1775 }
1776
1777 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1778 {
1779         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1780         kvm_clear_async_pf_completion_queue(vcpu);
1781         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1782                                     KVM_SYNC_GPRS |
1783                                     KVM_SYNC_ACRS |
1784                                     KVM_SYNC_CRS |
1785                                     KVM_SYNC_ARCH0 |
1786                                     KVM_SYNC_PFAULT;
1787         kvm_s390_set_prefix(vcpu, 0);
1788         if (test_kvm_facility(vcpu->kvm, 64))
1789                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1790         if (test_kvm_facility(vcpu->kvm, 133))
1791                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
1792         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1793          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1794          */
1795         if (MACHINE_HAS_VX)
1796                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1797         else
1798                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1799
1800         if (kvm_is_ucontrol(vcpu->kvm))
1801                 return __kvm_ucontrol_vcpu_init(vcpu);
1802
1803         return 0;
1804 }
1805
1806 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1807 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1808 {
1809         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1810         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1811         vcpu->arch.cputm_start = get_tod_clock_fast();
1812         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1813 }
1814
1815 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1816 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1817 {
1818         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1819         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1820         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1821         vcpu->arch.cputm_start = 0;
1822         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1823 }
1824
1825 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1826 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1827 {
1828         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1829         vcpu->arch.cputm_enabled = true;
1830         __start_cpu_timer_accounting(vcpu);
1831 }
1832
1833 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1834 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1835 {
1836         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1837         __stop_cpu_timer_accounting(vcpu);
1838         vcpu->arch.cputm_enabled = false;
1839 }
1840
1841 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1842 {
1843         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1844         __enable_cpu_timer_accounting(vcpu);
1845         preempt_enable();
1846 }
1847
1848 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1849 {
1850         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1851         __disable_cpu_timer_accounting(vcpu);
1852         preempt_enable();
1853 }
1854
1855 /* set the cpu timer - may only be called from the VCPU thread itself */
1856 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1857 {
1858         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1859         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1860         if (vcpu->arch.cputm_enabled)
1861                 vcpu->arch.cputm_start = get_tod_clock_fast();
1862         vcpu->arch.sie_block->cputm = cputm;
1863         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1864         preempt_enable();
1865 }
1866
1867 /* update and get the cpu timer - can also be called from other VCPU threads */
1868 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1869 {
1870         unsigned int seq;
1871         __u64 value;
1872
1873         if (unlikely(!vcpu->arch.cputm_enabled))
1874                 return vcpu->arch.sie_block->cputm;
1875
1876         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1877         do {
1878                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1879                 /*
1880                  * If the writer would ever execute a read in the critical
1881                  * section, e.g. in irq context, we have a deadlock.
1882                  */
1883                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1884                 value = vcpu->arch.sie_block->cputm;
1885                 /* if cputm_start is 0, accounting is being started/stopped */
1886                 if (likely(vcpu->arch.cputm_start))
1887                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1888         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1889         preempt_enable();
1890         return value;
1891 }
1892
1893 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1894 {
1895
1896         gmap_enable(vcpu->arch.enabled_gmap);
1897         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1898         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1899                 __start_cpu_timer_accounting(vcpu);
1900         vcpu->cpu = cpu;
1901 }
1902
1903 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1904 {
1905         vcpu->cpu = -1;
1906         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1907                 __stop_cpu_timer_accounting(vcpu);
1908         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1909         vcpu->arch.enabled_gmap = gmap_get_enabled();
1910         gmap_disable(vcpu->arch.enabled_gmap);
1911
1912 }
1913
1914 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1915 {
1916         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1917         vcpu->arch.sie_block->gpsw.mask = 0UL;
1918         vcpu->arch.sie_block->gpsw.addr = 0UL;
1919         kvm_s390_set_prefix(vcpu, 0);
1920         kvm_s390_set_cpu_timer(vcpu, 0);
1921         vcpu->arch.sie_block->ckc       = 0UL;
1922         vcpu->arch.sie_block->todpr     = 0;
1923         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1924         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1925         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1926         /* make sure the new fpc will be lazily loaded */
1927         save_fpu_regs();
1928         current->thread.fpu.fpc = 0;
1929         vcpu->arch.sie_block->gbea = 1;
1930         vcpu->arch.sie_block->pp = 0;
1931         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1932         kvm_clear_async_pf_completion_queue(vcpu);
1933         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1934                 kvm_s390_vcpu_stop(vcpu);
1935         kvm_s390_clear_local_irqs(vcpu);
1936 }
1937
1938 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1939 {
1940         mutex_lock(&vcpu->kvm->lock);
1941         preempt_disable();
1942         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1943         preempt_enable();
1944         mutex_unlock(&vcpu->kvm->lock);
1945         if (!kvm_is_ucontrol(vcpu->kvm)) {
1946                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1947                 sca_add_vcpu(vcpu);
1948         }
1949         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1950                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1951         /* make vcpu_load load the right gmap on the first trigger */
1952         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1953 }
1954
1955 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1956 {
1957         if (!test_kvm_facility(vcpu->kvm, 76))
1958                 return;
1959
1960         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1961
1962         if (vcpu->kvm->arch.crypto.aes_kw)
1963                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1964         if (vcpu->kvm->arch.crypto.dea_kw)
1965                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1966
1967         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1968 }
1969
1970 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1971 {
1972         free_page(vcpu->arch.sie_block->cbrlo);
1973         vcpu->arch.sie_block->cbrlo = 0;
1974 }
1975
1976 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1977 {
1978         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1979         if (!vcpu->arch.sie_block->cbrlo)
1980                 return -ENOMEM;
1981
1982         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
1983         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
1984         return 0;
1985 }
1986
1987 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1988 {
1989         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1990
1991         vcpu->arch.sie_block->ibc = model->ibc;
1992         if (test_kvm_facility(vcpu->kvm, 7))
1993                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1994 }
1995
1996 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1997 {
1998         int rc = 0;
1999
2000         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2001                                                     CPUSTAT_SM |
2002                                                     CPUSTAT_STOPPED);
2003
2004         if (test_kvm_facility(vcpu->kvm, 78))
2005                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2006         else if (test_kvm_facility(vcpu->kvm, 8))
2007                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2008
2009         kvm_s390_vcpu_setup_model(vcpu);
2010
2011         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2012         if (MACHINE_HAS_ESOP)
2013                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2014         if (test_kvm_facility(vcpu->kvm, 9))
2015                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2016         if (test_kvm_facility(vcpu->kvm, 73))
2017                 vcpu->arch.sie_block->ecb |= ECB_TE;
2018
2019         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2020                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2021         if (test_kvm_facility(vcpu->kvm, 130))
2022                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2023         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2024         if (sclp.has_cei)
2025                 vcpu->arch.sie_block->eca |= ECA_CEI;
2026         if (sclp.has_ib)
2027                 vcpu->arch.sie_block->eca |= ECA_IB;
2028         if (sclp.has_siif)
2029                 vcpu->arch.sie_block->eca |= ECA_SII;
2030         if (sclp.has_sigpif)
2031                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2032         if (test_kvm_facility(vcpu->kvm, 129)) {
2033                 vcpu->arch.sie_block->eca |= ECA_VX;
2034                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2035         }
2036         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2037                                         | SDNXC;
2038         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2039
2040         if (sclp.has_kss)
2041                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2042         else
2043                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2044
2045         if (vcpu->kvm->arch.use_cmma) {
2046                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2047                 if (rc)
2048                         return rc;
2049         }
2050         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2051         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2052
2053         kvm_s390_vcpu_crypto_setup(vcpu);
2054
2055         return rc;
2056 }
2057
2058 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2059                                       unsigned int id)
2060 {
2061         struct kvm_vcpu *vcpu;
2062         struct sie_page *sie_page;
2063         int rc = -EINVAL;
2064
2065         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2066                 goto out;
2067
2068         rc = -ENOMEM;
2069
2070         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2071         if (!vcpu)
2072                 goto out;
2073
2074         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2075         if (!sie_page)
2076                 goto out_free_cpu;
2077
2078         vcpu->arch.sie_block = &sie_page->sie_block;
2079         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2080
2081         /* the real guest size will always be smaller than msl */
2082         vcpu->arch.sie_block->mso = 0;
2083         vcpu->arch.sie_block->msl = sclp.hamax;
2084
2085         vcpu->arch.sie_block->icpua = id;
2086         spin_lock_init(&vcpu->arch.local_int.lock);
2087         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2088         vcpu->arch.local_int.wq = &vcpu->wq;
2089         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2090         seqcount_init(&vcpu->arch.cputm_seqcount);
2091
2092         rc = kvm_vcpu_init(vcpu, kvm, id);
2093         if (rc)
2094                 goto out_free_sie_block;
2095         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2096                  vcpu->arch.sie_block);
2097         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2098
2099         return vcpu;
2100 out_free_sie_block:
2101         free_page((unsigned long)(vcpu->arch.sie_block));
2102 out_free_cpu:
2103         kmem_cache_free(kvm_vcpu_cache, vcpu);
2104 out:
2105         return ERR_PTR(rc);
2106 }
2107
2108 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2109 {
2110         return kvm_s390_vcpu_has_irq(vcpu, 0);
2111 }
2112
2113 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2114 {
2115         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2116         exit_sie(vcpu);
2117 }
2118
2119 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2120 {
2121         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2122 }
2123
2124 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2125 {
2126         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2127         exit_sie(vcpu);
2128 }
2129
2130 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2131 {
2132         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2133 }
2134
2135 /*
2136  * Kick a guest cpu out of SIE and wait until SIE is not running.
2137  * If the CPU is not running (e.g. waiting as idle) the function will
2138  * return immediately. */
2139 void exit_sie(struct kvm_vcpu *vcpu)
2140 {
2141         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2142         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2143                 cpu_relax();
2144 }
2145
2146 /* Kick a guest cpu out of SIE to process a request synchronously */
2147 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2148 {
2149         kvm_make_request(req, vcpu);
2150         kvm_s390_vcpu_request(vcpu);
2151 }
2152
2153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2154                               unsigned long end)
2155 {
2156         struct kvm *kvm = gmap->private;
2157         struct kvm_vcpu *vcpu;
2158         unsigned long prefix;
2159         int i;
2160
2161         if (gmap_is_shadow(gmap))
2162                 return;
2163         if (start >= 1UL << 31)
2164                 /* We are only interested in prefix pages */
2165                 return;
2166         kvm_for_each_vcpu(i, vcpu, kvm) {
2167                 /* match against both prefix pages */
2168                 prefix = kvm_s390_get_prefix(vcpu);
2169                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2170                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2171                                    start, end);
2172                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2173                 }
2174         }
2175 }
2176
2177 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2178 {
2179         /* kvm common code refers to this, but never calls it */
2180         BUG();
2181         return 0;
2182 }
2183
2184 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2185                                            struct kvm_one_reg *reg)
2186 {
2187         int r = -EINVAL;
2188
2189         switch (reg->id) {
2190         case KVM_REG_S390_TODPR:
2191                 r = put_user(vcpu->arch.sie_block->todpr,
2192                              (u32 __user *)reg->addr);
2193                 break;
2194         case KVM_REG_S390_EPOCHDIFF:
2195                 r = put_user(vcpu->arch.sie_block->epoch,
2196                              (u64 __user *)reg->addr);
2197                 break;
2198         case KVM_REG_S390_CPU_TIMER:
2199                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2200                              (u64 __user *)reg->addr);
2201                 break;
2202         case KVM_REG_S390_CLOCK_COMP:
2203                 r = put_user(vcpu->arch.sie_block->ckc,
2204                              (u64 __user *)reg->addr);
2205                 break;
2206         case KVM_REG_S390_PFTOKEN:
2207                 r = put_user(vcpu->arch.pfault_token,
2208                              (u64 __user *)reg->addr);
2209                 break;
2210         case KVM_REG_S390_PFCOMPARE:
2211                 r = put_user(vcpu->arch.pfault_compare,
2212                              (u64 __user *)reg->addr);
2213                 break;
2214         case KVM_REG_S390_PFSELECT:
2215                 r = put_user(vcpu->arch.pfault_select,
2216                              (u64 __user *)reg->addr);
2217                 break;
2218         case KVM_REG_S390_PP:
2219                 r = put_user(vcpu->arch.sie_block->pp,
2220                              (u64 __user *)reg->addr);
2221                 break;
2222         case KVM_REG_S390_GBEA:
2223                 r = put_user(vcpu->arch.sie_block->gbea,
2224                              (u64 __user *)reg->addr);
2225                 break;
2226         default:
2227                 break;
2228         }
2229
2230         return r;
2231 }
2232
2233 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2234                                            struct kvm_one_reg *reg)
2235 {
2236         int r = -EINVAL;
2237         __u64 val;
2238
2239         switch (reg->id) {
2240         case KVM_REG_S390_TODPR:
2241                 r = get_user(vcpu->arch.sie_block->todpr,
2242                              (u32 __user *)reg->addr);
2243                 break;
2244         case KVM_REG_S390_EPOCHDIFF:
2245                 r = get_user(vcpu->arch.sie_block->epoch,
2246                              (u64 __user *)reg->addr);
2247                 break;
2248         case KVM_REG_S390_CPU_TIMER:
2249                 r = get_user(val, (u64 __user *)reg->addr);
2250                 if (!r)
2251                         kvm_s390_set_cpu_timer(vcpu, val);
2252                 break;
2253         case KVM_REG_S390_CLOCK_COMP:
2254                 r = get_user(vcpu->arch.sie_block->ckc,
2255                              (u64 __user *)reg->addr);
2256                 break;
2257         case KVM_REG_S390_PFTOKEN:
2258                 r = get_user(vcpu->arch.pfault_token,
2259                              (u64 __user *)reg->addr);
2260                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2261                         kvm_clear_async_pf_completion_queue(vcpu);
2262                 break;
2263         case KVM_REG_S390_PFCOMPARE:
2264                 r = get_user(vcpu->arch.pfault_compare,
2265                              (u64 __user *)reg->addr);
2266                 break;
2267         case KVM_REG_S390_PFSELECT:
2268                 r = get_user(vcpu->arch.pfault_select,
2269                              (u64 __user *)reg->addr);
2270                 break;
2271         case KVM_REG_S390_PP:
2272                 r = get_user(vcpu->arch.sie_block->pp,
2273                              (u64 __user *)reg->addr);
2274                 break;
2275         case KVM_REG_S390_GBEA:
2276                 r = get_user(vcpu->arch.sie_block->gbea,
2277                              (u64 __user *)reg->addr);
2278                 break;
2279         default:
2280                 break;
2281         }
2282
2283         return r;
2284 }
2285
2286 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2287 {
2288         kvm_s390_vcpu_initial_reset(vcpu);
2289         return 0;
2290 }
2291
2292 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2293 {
2294         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2295         return 0;
2296 }
2297
2298 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2299 {
2300         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2301         return 0;
2302 }
2303
2304 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2305                                   struct kvm_sregs *sregs)
2306 {
2307         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2308         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2309         return 0;
2310 }
2311
2312 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2313                                   struct kvm_sregs *sregs)
2314 {
2315         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2316         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2317         return 0;
2318 }
2319
2320 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2321 {
2322         if (test_fp_ctl(fpu->fpc))
2323                 return -EINVAL;
2324         vcpu->run->s.regs.fpc = fpu->fpc;
2325         if (MACHINE_HAS_VX)
2326                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2327                                  (freg_t *) fpu->fprs);
2328         else
2329                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2330         return 0;
2331 }
2332
2333 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2334 {
2335         /* make sure we have the latest values */
2336         save_fpu_regs();
2337         if (MACHINE_HAS_VX)
2338                 convert_vx_to_fp((freg_t *) fpu->fprs,
2339                                  (__vector128 *) vcpu->run->s.regs.vrs);
2340         else
2341                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2342         fpu->fpc = vcpu->run->s.regs.fpc;
2343         return 0;
2344 }
2345
2346 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2347 {
2348         int rc = 0;
2349
2350         if (!is_vcpu_stopped(vcpu))
2351                 rc = -EBUSY;
2352         else {
2353                 vcpu->run->psw_mask = psw.mask;
2354                 vcpu->run->psw_addr = psw.addr;
2355         }
2356         return rc;
2357 }
2358
2359 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2360                                   struct kvm_translation *tr)
2361 {
2362         return -EINVAL; /* not implemented yet */
2363 }
2364
2365 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2366                               KVM_GUESTDBG_USE_HW_BP | \
2367                               KVM_GUESTDBG_ENABLE)
2368
2369 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2370                                         struct kvm_guest_debug *dbg)
2371 {
2372         int rc = 0;
2373
2374         vcpu->guest_debug = 0;
2375         kvm_s390_clear_bp_data(vcpu);
2376
2377         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2378                 return -EINVAL;
2379         if (!sclp.has_gpere)
2380                 return -EINVAL;
2381
2382         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2383                 vcpu->guest_debug = dbg->control;
2384                 /* enforce guest PER */
2385                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2386
2387                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2388                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2389         } else {
2390                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2391                 vcpu->arch.guestdbg.last_bp = 0;
2392         }
2393
2394         if (rc) {
2395                 vcpu->guest_debug = 0;
2396                 kvm_s390_clear_bp_data(vcpu);
2397                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2398         }
2399
2400         return rc;
2401 }
2402
2403 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2404                                     struct kvm_mp_state *mp_state)
2405 {
2406         /* CHECK_STOP and LOAD are not supported yet */
2407         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2408                                        KVM_MP_STATE_OPERATING;
2409 }
2410
2411 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2412                                     struct kvm_mp_state *mp_state)
2413 {
2414         int rc = 0;
2415
2416         /* user space knows about this interface - let it control the state */
2417         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2418
2419         switch (mp_state->mp_state) {
2420         case KVM_MP_STATE_STOPPED:
2421                 kvm_s390_vcpu_stop(vcpu);
2422                 break;
2423         case KVM_MP_STATE_OPERATING:
2424                 kvm_s390_vcpu_start(vcpu);
2425                 break;
2426         case KVM_MP_STATE_LOAD:
2427         case KVM_MP_STATE_CHECK_STOP:
2428                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2429         default:
2430                 rc = -ENXIO;
2431         }
2432
2433         return rc;
2434 }
2435
2436 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2437 {
2438         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2439 }
2440
2441 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2442 {
2443 retry:
2444         kvm_s390_vcpu_request_handled(vcpu);
2445         if (!vcpu->requests)
2446                 return 0;
2447         /*
2448          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2449          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2450          * This ensures that the ipte instruction for this request has
2451          * already finished. We might race against a second unmapper that
2452          * wants to set the blocking bit. Lets just retry the request loop.
2453          */
2454         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2455                 int rc;
2456                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2457                                           kvm_s390_get_prefix(vcpu),
2458                                           PAGE_SIZE * 2, PROT_WRITE);
2459                 if (rc) {
2460                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2461                         return rc;
2462                 }
2463                 goto retry;
2464         }
2465
2466         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2467                 vcpu->arch.sie_block->ihcpu = 0xffff;
2468                 goto retry;
2469         }
2470
2471         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2472                 if (!ibs_enabled(vcpu)) {
2473                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2474                         atomic_or(CPUSTAT_IBS,
2475                                         &vcpu->arch.sie_block->cpuflags);
2476                 }
2477                 goto retry;
2478         }
2479
2480         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2481                 if (ibs_enabled(vcpu)) {
2482                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2483                         atomic_andnot(CPUSTAT_IBS,
2484                                           &vcpu->arch.sie_block->cpuflags);
2485                 }
2486                 goto retry;
2487         }
2488
2489         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2490                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2491                 goto retry;
2492         }
2493
2494         /* nothing to do, just clear the request */
2495         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2496
2497         return 0;
2498 }
2499
2500 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2501 {
2502         struct kvm_vcpu *vcpu;
2503         int i;
2504
2505         mutex_lock(&kvm->lock);
2506         preempt_disable();
2507         kvm->arch.epoch = tod - get_tod_clock();
2508         kvm_s390_vcpu_block_all(kvm);
2509         kvm_for_each_vcpu(i, vcpu, kvm)
2510                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2511         kvm_s390_vcpu_unblock_all(kvm);
2512         preempt_enable();
2513         mutex_unlock(&kvm->lock);
2514 }
2515
2516 /**
2517  * kvm_arch_fault_in_page - fault-in guest page if necessary
2518  * @vcpu: The corresponding virtual cpu
2519  * @gpa: Guest physical address
2520  * @writable: Whether the page should be writable or not
2521  *
2522  * Make sure that a guest page has been faulted-in on the host.
2523  *
2524  * Return: Zero on success, negative error code otherwise.
2525  */
2526 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2527 {
2528         return gmap_fault(vcpu->arch.gmap, gpa,
2529                           writable ? FAULT_FLAG_WRITE : 0);
2530 }
2531
2532 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2533                                       unsigned long token)
2534 {
2535         struct kvm_s390_interrupt inti;
2536         struct kvm_s390_irq irq;
2537
2538         if (start_token) {
2539                 irq.u.ext.ext_params2 = token;
2540                 irq.type = KVM_S390_INT_PFAULT_INIT;
2541                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2542         } else {
2543                 inti.type = KVM_S390_INT_PFAULT_DONE;
2544                 inti.parm64 = token;
2545                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2546         }
2547 }
2548
2549 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2550                                      struct kvm_async_pf *work)
2551 {
2552         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2553         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2554 }
2555
2556 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2557                                  struct kvm_async_pf *work)
2558 {
2559         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2560         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2561 }
2562
2563 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2564                                struct kvm_async_pf *work)
2565 {
2566         /* s390 will always inject the page directly */
2567 }
2568
2569 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2570 {
2571         /*
2572          * s390 will always inject the page directly,
2573          * but we still want check_async_completion to cleanup
2574          */
2575         return true;
2576 }
2577
2578 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2579 {
2580         hva_t hva;
2581         struct kvm_arch_async_pf arch;
2582         int rc;
2583
2584         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2585                 return 0;
2586         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2587             vcpu->arch.pfault_compare)
2588                 return 0;
2589         if (psw_extint_disabled(vcpu))
2590                 return 0;
2591         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2592                 return 0;
2593         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2594                 return 0;
2595         if (!vcpu->arch.gmap->pfault_enabled)
2596                 return 0;
2597
2598         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2599         hva += current->thread.gmap_addr & ~PAGE_MASK;
2600         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2601                 return 0;
2602
2603         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2604         return rc;
2605 }
2606
2607 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2608 {
2609         int rc, cpuflags;
2610
2611         /*
2612          * On s390 notifications for arriving pages will be delivered directly
2613          * to the guest but the house keeping for completed pfaults is
2614          * handled outside the worker.
2615          */
2616         kvm_check_async_pf_completion(vcpu);
2617
2618         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2619         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2620
2621         if (need_resched())
2622                 schedule();
2623
2624         if (test_cpu_flag(CIF_MCCK_PENDING))
2625                 s390_handle_mcck();
2626
2627         if (!kvm_is_ucontrol(vcpu->kvm)) {
2628                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2629                 if (rc)
2630                         return rc;
2631         }
2632
2633         rc = kvm_s390_handle_requests(vcpu);
2634         if (rc)
2635                 return rc;
2636
2637         if (guestdbg_enabled(vcpu)) {
2638                 kvm_s390_backup_guest_per_regs(vcpu);
2639                 kvm_s390_patch_guest_per_regs(vcpu);
2640         }
2641
2642         vcpu->arch.sie_block->icptcode = 0;
2643         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2644         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2645         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2646
2647         return 0;
2648 }
2649
2650 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2651 {
2652         struct kvm_s390_pgm_info pgm_info = {
2653                 .code = PGM_ADDRESSING,
2654         };
2655         u8 opcode, ilen;
2656         int rc;
2657
2658         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2659         trace_kvm_s390_sie_fault(vcpu);
2660
2661         /*
2662          * We want to inject an addressing exception, which is defined as a
2663          * suppressing or terminating exception. However, since we came here
2664          * by a DAT access exception, the PSW still points to the faulting
2665          * instruction since DAT exceptions are nullifying. So we've got
2666          * to look up the current opcode to get the length of the instruction
2667          * to be able to forward the PSW.
2668          */
2669         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2670         ilen = insn_length(opcode);
2671         if (rc < 0) {
2672                 return rc;
2673         } else if (rc) {
2674                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2675                  * Forward by arbitrary ilc, injection will take care of
2676                  * nullification if necessary.
2677                  */
2678                 pgm_info = vcpu->arch.pgm;
2679                 ilen = 4;
2680         }
2681         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2682         kvm_s390_forward_psw(vcpu, ilen);
2683         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2684 }
2685
2686 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2687 {
2688         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2689                    vcpu->arch.sie_block->icptcode);
2690         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2691
2692         if (guestdbg_enabled(vcpu))
2693                 kvm_s390_restore_guest_per_regs(vcpu);
2694
2695         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2696         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2697
2698         if (vcpu->arch.sie_block->icptcode > 0) {
2699                 int rc = kvm_handle_sie_intercept(vcpu);
2700
2701                 if (rc != -EOPNOTSUPP)
2702                         return rc;
2703                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2704                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2705                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2706                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2707                 return -EREMOTE;
2708         } else if (exit_reason != -EFAULT) {
2709                 vcpu->stat.exit_null++;
2710                 return 0;
2711         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2712                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2713                 vcpu->run->s390_ucontrol.trans_exc_code =
2714                                                 current->thread.gmap_addr;
2715                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2716                 return -EREMOTE;
2717         } else if (current->thread.gmap_pfault) {
2718                 trace_kvm_s390_major_guest_pfault(vcpu);
2719                 current->thread.gmap_pfault = 0;
2720                 if (kvm_arch_setup_async_pf(vcpu))
2721                         return 0;
2722                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2723         }
2724         return vcpu_post_run_fault_in_sie(vcpu);
2725 }
2726
2727 static int __vcpu_run(struct kvm_vcpu *vcpu)
2728 {
2729         int rc, exit_reason;
2730
2731         /*
2732          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2733          * ning the guest), so that memslots (and other stuff) are protected
2734          */
2735         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2736
2737         do {
2738                 rc = vcpu_pre_run(vcpu);
2739                 if (rc)
2740                         break;
2741
2742                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2743                 /*
2744                  * As PF_VCPU will be used in fault handler, between
2745                  * guest_enter and guest_exit should be no uaccess.
2746                  */
2747                 local_irq_disable();
2748                 guest_enter_irqoff();
2749                 __disable_cpu_timer_accounting(vcpu);
2750                 local_irq_enable();
2751                 exit_reason = sie64a(vcpu->arch.sie_block,
2752                                      vcpu->run->s.regs.gprs);
2753                 local_irq_disable();
2754                 __enable_cpu_timer_accounting(vcpu);
2755                 guest_exit_irqoff();
2756                 local_irq_enable();
2757                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2758
2759                 rc = vcpu_post_run(vcpu, exit_reason);
2760         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2761
2762         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2763         return rc;
2764 }
2765
2766 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2767 {
2768         struct runtime_instr_cb *riccb;
2769         struct gs_cb *gscb;
2770
2771         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2772         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
2773         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2774         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2775         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2776                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2777         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2778                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2779                 /* some control register changes require a tlb flush */
2780                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2781         }
2782         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2783                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2784                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2785                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2786                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2787                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2788         }
2789         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2790                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2791                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2792                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2793                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2794                         kvm_clear_async_pf_completion_queue(vcpu);
2795         }
2796         /*
2797          * If userspace sets the riccb (e.g. after migration) to a valid state,
2798          * we should enable RI here instead of doing the lazy enablement.
2799          */
2800         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2801             test_kvm_facility(vcpu->kvm, 64) &&
2802             riccb->valid &&
2803             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
2804                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
2805                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
2806         }
2807         /*
2808          * If userspace sets the gscb (e.g. after migration) to non-zero,
2809          * we should enable GS here instead of doing the lazy enablement.
2810          */
2811         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
2812             test_kvm_facility(vcpu->kvm, 133) &&
2813             gscb->gssm &&
2814             !vcpu->arch.gs_enabled) {
2815                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
2816                 vcpu->arch.sie_block->ecb |= ECB_GS;
2817                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2818                 vcpu->arch.gs_enabled = 1;
2819         }
2820         save_access_regs(vcpu->arch.host_acrs);
2821         restore_access_regs(vcpu->run->s.regs.acrs);
2822         /* save host (userspace) fprs/vrs */
2823         save_fpu_regs();
2824         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2825         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2826         if (MACHINE_HAS_VX)
2827                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2828         else
2829                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2830         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2831         if (test_fp_ctl(current->thread.fpu.fpc))
2832                 /* User space provided an invalid FPC, let's clear it */
2833                 current->thread.fpu.fpc = 0;
2834         if (MACHINE_HAS_GS) {
2835                 preempt_disable();
2836                 __ctl_set_bit(2, 4);
2837                 if (current->thread.gs_cb) {
2838                         vcpu->arch.host_gscb = current->thread.gs_cb;
2839                         save_gs_cb(vcpu->arch.host_gscb);
2840                 }
2841                 if (vcpu->arch.gs_enabled) {
2842                         current->thread.gs_cb = (struct gs_cb *)
2843                                                 &vcpu->run->s.regs.gscb;
2844                         restore_gs_cb(current->thread.gs_cb);
2845                 }
2846                 preempt_enable();
2847         }
2848
2849         kvm_run->kvm_dirty_regs = 0;
2850 }
2851
2852 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2853 {
2854         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2855         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2856         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2857         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2858         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2859         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2860         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2861         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2862         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2863         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2864         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2865         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2866         save_access_regs(vcpu->run->s.regs.acrs);
2867         restore_access_regs(vcpu->arch.host_acrs);
2868         /* Save guest register state */
2869         save_fpu_regs();
2870         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2871         /* Restore will be done lazily at return */
2872         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2873         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2874         if (MACHINE_HAS_GS) {
2875                 __ctl_set_bit(2, 4);
2876                 if (vcpu->arch.gs_enabled)
2877                         save_gs_cb(current->thread.gs_cb);
2878                 preempt_disable();
2879                 current->thread.gs_cb = vcpu->arch.host_gscb;
2880                 restore_gs_cb(vcpu->arch.host_gscb);
2881                 preempt_enable();
2882                 if (!vcpu->arch.host_gscb)
2883                         __ctl_clear_bit(2, 4);
2884                 vcpu->arch.host_gscb = NULL;
2885         }
2886
2887 }
2888
2889 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2890 {
2891         int rc;
2892         sigset_t sigsaved;
2893
2894         if (kvm_run->immediate_exit)
2895                 return -EINTR;
2896
2897         if (guestdbg_exit_pending(vcpu)) {
2898                 kvm_s390_prepare_debug_exit(vcpu);
2899                 return 0;
2900         }
2901
2902         if (vcpu->sigset_active)
2903                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2904
2905         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2906                 kvm_s390_vcpu_start(vcpu);
2907         } else if (is_vcpu_stopped(vcpu)) {
2908                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2909                                    vcpu->vcpu_id);
2910                 return -EINVAL;
2911         }
2912
2913         sync_regs(vcpu, kvm_run);
2914         enable_cpu_timer_accounting(vcpu);
2915
2916         might_fault();
2917         rc = __vcpu_run(vcpu);
2918
2919         if (signal_pending(current) && !rc) {
2920                 kvm_run->exit_reason = KVM_EXIT_INTR;
2921                 rc = -EINTR;
2922         }
2923
2924         if (guestdbg_exit_pending(vcpu) && !rc)  {
2925                 kvm_s390_prepare_debug_exit(vcpu);
2926                 rc = 0;
2927         }
2928
2929         if (rc == -EREMOTE) {
2930                 /* userspace support is needed, kvm_run has been prepared */
2931                 rc = 0;
2932         }
2933
2934         disable_cpu_timer_accounting(vcpu);
2935         store_regs(vcpu, kvm_run);
2936
2937         if (vcpu->sigset_active)
2938                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2939
2940         vcpu->stat.exit_userspace++;
2941         return rc;
2942 }
2943
2944 /*
2945  * store status at address
2946  * we use have two special cases:
2947  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2948  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2949  */
2950 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2951 {
2952         unsigned char archmode = 1;
2953         freg_t fprs[NUM_FPRS];
2954         unsigned int px;
2955         u64 clkcomp, cputm;
2956         int rc;
2957
2958         px = kvm_s390_get_prefix(vcpu);
2959         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2960                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2961                         return -EFAULT;
2962                 gpa = 0;
2963         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2964                 if (write_guest_real(vcpu, 163, &archmode, 1))
2965                         return -EFAULT;
2966                 gpa = px;
2967         } else
2968                 gpa -= __LC_FPREGS_SAVE_AREA;
2969
2970         /* manually convert vector registers if necessary */
2971         if (MACHINE_HAS_VX) {
2972                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2973                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2974                                      fprs, 128);
2975         } else {
2976                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2977                                      vcpu->run->s.regs.fprs, 128);
2978         }
2979         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2980                               vcpu->run->s.regs.gprs, 128);
2981         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2982                               &vcpu->arch.sie_block->gpsw, 16);
2983         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2984                               &px, 4);
2985         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2986                               &vcpu->run->s.regs.fpc, 4);
2987         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2988                               &vcpu->arch.sie_block->todpr, 4);
2989         cputm = kvm_s390_get_cpu_timer(vcpu);
2990         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2991                               &cputm, 8);
2992         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2993         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2994                               &clkcomp, 8);
2995         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2996                               &vcpu->run->s.regs.acrs, 64);
2997         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2998                               &vcpu->arch.sie_block->gcr, 128);
2999         return rc ? -EFAULT : 0;
3000 }
3001
3002 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3003 {
3004         /*
3005          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3006          * switch in the run ioctl. Let's update our copies before we save
3007          * it into the save area
3008          */
3009         save_fpu_regs();
3010         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3011         save_access_regs(vcpu->run->s.regs.acrs);
3012
3013         return kvm_s390_store_status_unloaded(vcpu, addr);
3014 }
3015
3016 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3017 {
3018         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3019         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3020 }
3021
3022 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3023 {
3024         unsigned int i;
3025         struct kvm_vcpu *vcpu;
3026
3027         kvm_for_each_vcpu(i, vcpu, kvm) {
3028                 __disable_ibs_on_vcpu(vcpu);
3029         }
3030 }
3031
3032 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3033 {
3034         if (!sclp.has_ibs)
3035                 return;
3036         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3037         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3038 }
3039
3040 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3041 {
3042         int i, online_vcpus, started_vcpus = 0;
3043
3044         if (!is_vcpu_stopped(vcpu))
3045                 return;
3046
3047         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3048         /* Only one cpu at a time may enter/leave the STOPPED state. */
3049         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3050         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3051
3052         for (i = 0; i < online_vcpus; i++) {
3053                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3054                         started_vcpus++;
3055         }
3056
3057         if (started_vcpus == 0) {
3058                 /* we're the only active VCPU -> speed it up */
3059                 __enable_ibs_on_vcpu(vcpu);
3060         } else if (started_vcpus == 1) {
3061                 /*
3062                  * As we are starting a second VCPU, we have to disable
3063                  * the IBS facility on all VCPUs to remove potentially
3064                  * oustanding ENABLE requests.
3065                  */
3066                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3067         }
3068
3069         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3070         /*
3071          * Another VCPU might have used IBS while we were offline.
3072          * Let's play safe and flush the VCPU at startup.
3073          */
3074         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3075         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3076         return;
3077 }
3078
3079 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3080 {
3081         int i, online_vcpus, started_vcpus = 0;
3082         struct kvm_vcpu *started_vcpu = NULL;
3083
3084         if (is_vcpu_stopped(vcpu))
3085                 return;
3086
3087         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3088         /* Only one cpu at a time may enter/leave the STOPPED state. */
3089         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3090         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3091
3092         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3093         kvm_s390_clear_stop_irq(vcpu);
3094
3095         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3096         __disable_ibs_on_vcpu(vcpu);
3097
3098         for (i = 0; i < online_vcpus; i++) {
3099                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3100                         started_vcpus++;
3101                         started_vcpu = vcpu->kvm->vcpus[i];
3102                 }
3103         }
3104
3105         if (started_vcpus == 1) {
3106                 /*
3107                  * As we only have one VCPU left, we want to enable the
3108                  * IBS facility for that VCPU to speed it up.
3109                  */
3110                 __enable_ibs_on_vcpu(started_vcpu);
3111         }
3112
3113         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3114         return;
3115 }
3116
3117 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3118                                      struct kvm_enable_cap *cap)
3119 {
3120         int r;
3121
3122         if (cap->flags)
3123                 return -EINVAL;
3124
3125         switch (cap->cap) {
3126         case KVM_CAP_S390_CSS_SUPPORT:
3127                 if (!vcpu->kvm->arch.css_support) {
3128                         vcpu->kvm->arch.css_support = 1;
3129                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3130                         trace_kvm_s390_enable_css(vcpu->kvm);
3131                 }
3132                 r = 0;
3133                 break;
3134         default:
3135                 r = -EINVAL;
3136                 break;
3137         }
3138         return r;
3139 }
3140
3141 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3142                                   struct kvm_s390_mem_op *mop)
3143 {
3144         void __user *uaddr = (void __user *)mop->buf;
3145         void *tmpbuf = NULL;
3146         int r, srcu_idx;
3147         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3148                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3149
3150         if (mop->flags & ~supported_flags)
3151                 return -EINVAL;
3152
3153         if (mop->size > MEM_OP_MAX_SIZE)
3154                 return -E2BIG;
3155
3156         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3157                 tmpbuf = vmalloc(mop->size);
3158                 if (!tmpbuf)
3159                         return -ENOMEM;
3160         }
3161
3162         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3163
3164         switch (mop->op) {
3165         case KVM_S390_MEMOP_LOGICAL_READ:
3166                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3167                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3168                                             mop->size, GACC_FETCH);
3169                         break;
3170                 }
3171                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3172                 if (r == 0) {
3173                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3174                                 r = -EFAULT;
3175                 }
3176                 break;
3177         case KVM_S390_MEMOP_LOGICAL_WRITE:
3178                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3179                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3180                                             mop->size, GACC_STORE);
3181                         break;
3182                 }
3183                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3184                         r = -EFAULT;
3185                         break;
3186                 }
3187                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3188                 break;
3189         default:
3190                 r = -EINVAL;
3191         }
3192
3193         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3194
3195         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3196                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3197
3198         vfree(tmpbuf);
3199         return r;
3200 }
3201
3202 long kvm_arch_vcpu_ioctl(struct file *filp,
3203                          unsigned int ioctl, unsigned long arg)
3204 {
3205         struct kvm_vcpu *vcpu = filp->private_data;
3206         void __user *argp = (void __user *)arg;
3207         int idx;
3208         long r;
3209
3210         switch (ioctl) {
3211         case KVM_S390_IRQ: {
3212                 struct kvm_s390_irq s390irq;
3213
3214                 r = -EFAULT;
3215                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3216                         break;
3217                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3218                 break;
3219         }
3220         case KVM_S390_INTERRUPT: {
3221                 struct kvm_s390_interrupt s390int;
3222                 struct kvm_s390_irq s390irq;
3223
3224                 r = -EFAULT;
3225                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3226                         break;
3227                 if (s390int_to_s390irq(&s390int, &s390irq))
3228                         return -EINVAL;
3229                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3230                 break;
3231         }
3232         case KVM_S390_STORE_STATUS:
3233                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3234                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3235                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3236                 break;
3237         case KVM_S390_SET_INITIAL_PSW: {
3238                 psw_t psw;
3239
3240                 r = -EFAULT;
3241                 if (copy_from_user(&psw, argp, sizeof(psw)))
3242                         break;
3243                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3244                 break;
3245         }
3246         case KVM_S390_INITIAL_RESET:
3247                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3248                 break;
3249         case KVM_SET_ONE_REG:
3250         case KVM_GET_ONE_REG: {
3251                 struct kvm_one_reg reg;
3252                 r = -EFAULT;
3253                 if (copy_from_user(&reg, argp, sizeof(reg)))
3254                         break;
3255                 if (ioctl == KVM_SET_ONE_REG)
3256                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3257                 else
3258                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3259                 break;
3260         }
3261 #ifdef CONFIG_KVM_S390_UCONTROL
3262         case KVM_S390_UCAS_MAP: {
3263                 struct kvm_s390_ucas_mapping ucasmap;
3264
3265                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3266                         r = -EFAULT;
3267                         break;
3268                 }
3269
3270                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3271                         r = -EINVAL;
3272                         break;
3273                 }
3274
3275                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3276                                      ucasmap.vcpu_addr, ucasmap.length);
3277                 break;
3278         }
3279         case KVM_S390_UCAS_UNMAP: {
3280                 struct kvm_s390_ucas_mapping ucasmap;
3281
3282                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3283                         r = -EFAULT;
3284                         break;
3285                 }
3286
3287                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3288                         r = -EINVAL;
3289                         break;
3290                 }
3291
3292                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3293                         ucasmap.length);
3294                 break;
3295         }
3296 #endif
3297         case KVM_S390_VCPU_FAULT: {
3298                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3299                 break;
3300         }
3301         case KVM_ENABLE_CAP:
3302         {
3303                 struct kvm_enable_cap cap;
3304                 r = -EFAULT;
3305                 if (copy_from_user(&cap, argp, sizeof(cap)))
3306                         break;
3307                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3308                 break;
3309         }
3310         case KVM_S390_MEM_OP: {
3311                 struct kvm_s390_mem_op mem_op;
3312
3313                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3314                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3315                 else
3316                         r = -EFAULT;
3317                 break;
3318         }
3319         case KVM_S390_SET_IRQ_STATE: {
3320                 struct kvm_s390_irq_state irq_state;
3321
3322                 r = -EFAULT;
3323                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3324                         break;
3325                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3326                     irq_state.len == 0 ||
3327                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3328                         r = -EINVAL;
3329                         break;
3330                 }
3331                 r = kvm_s390_set_irq_state(vcpu,
3332                                            (void __user *) irq_state.buf,
3333                                            irq_state.len);
3334                 break;
3335         }
3336         case KVM_S390_GET_IRQ_STATE: {
3337                 struct kvm_s390_irq_state irq_state;
3338
3339                 r = -EFAULT;
3340                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3341                         break;
3342                 if (irq_state.len == 0) {
3343                         r = -EINVAL;
3344                         break;
3345                 }
3346                 r = kvm_s390_get_irq_state(vcpu,
3347                                            (__u8 __user *)  irq_state.buf,
3348                                            irq_state.len);
3349                 break;
3350         }
3351         default:
3352                 r = -ENOTTY;
3353         }
3354         return r;
3355 }
3356
3357 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3358 {
3359 #ifdef CONFIG_KVM_S390_UCONTROL
3360         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3361                  && (kvm_is_ucontrol(vcpu->kvm))) {
3362                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3363                 get_page(vmf->page);
3364                 return 0;
3365         }
3366 #endif
3367         return VM_FAULT_SIGBUS;
3368 }
3369
3370 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3371                             unsigned long npages)
3372 {
3373         return 0;
3374 }
3375
3376 /* Section: memory related */
3377 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3378                                    struct kvm_memory_slot *memslot,
3379                                    const struct kvm_userspace_memory_region *mem,
3380                                    enum kvm_mr_change change)
3381 {
3382         /* A few sanity checks. We can have memory slots which have to be
3383            located/ended at a segment boundary (1MB). The memory in userland is
3384            ok to be fragmented into various different vmas. It is okay to mmap()
3385            and munmap() stuff in this slot after doing this call at any time */
3386
3387         if (mem->userspace_addr & 0xffffful)
3388                 return -EINVAL;
3389
3390         if (mem->memory_size & 0xffffful)
3391                 return -EINVAL;
3392
3393         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3394                 return -EINVAL;
3395
3396         return 0;
3397 }
3398
3399 void kvm_arch_commit_memory_region(struct kvm *kvm,
3400                                 const struct kvm_userspace_memory_region *mem,
3401                                 const struct kvm_memory_slot *old,
3402                                 const struct kvm_memory_slot *new,
3403                                 enum kvm_mr_change change)
3404 {
3405         int rc;
3406
3407         /* If the basics of the memslot do not change, we do not want
3408          * to update the gmap. Every update causes several unnecessary
3409          * segment translation exceptions. This is usually handled just
3410          * fine by the normal fault handler + gmap, but it will also
3411          * cause faults on the prefix page of running guest CPUs.
3412          */
3413         if (old->userspace_addr == mem->userspace_addr &&
3414             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3415             old->npages * PAGE_SIZE == mem->memory_size)
3416                 return;
3417
3418         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3419                 mem->guest_phys_addr, mem->memory_size);
3420         if (rc)
3421                 pr_warn("failed to commit memory region\n");
3422         return;
3423 }
3424
3425 static inline unsigned long nonhyp_mask(int i)
3426 {
3427         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3428
3429         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3430 }
3431
3432 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3433 {
3434         vcpu->valid_wakeup = false;
3435 }
3436
3437 static int __init kvm_s390_init(void)
3438 {
3439         int i;
3440
3441         if (!sclp.has_sief2) {
3442                 pr_info("SIE not available\n");
3443                 return -ENODEV;
3444         }
3445
3446         for (i = 0; i < 16; i++)
3447                 kvm_s390_fac_list_mask[i] |=
3448                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3449
3450         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3451 }
3452
3453 static void __exit kvm_s390_exit(void)
3454 {
3455         kvm_exit();
3456 }
3457
3458 module_init(kvm_s390_init);
3459 module_exit(kvm_s390_exit);
3460
3461 /*
3462  * Enable autoloading of the kvm module.
3463  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3464  * since x86 takes a different approach.
3465  */
3466 #include <linux/miscdevice.h>
3467 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3468 MODULE_ALIAS("devname:kvm");