arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include "kvm-s390.h"
  47 #include "gaccess.h"
  48
  49 #define KMSG_COMPONENT "kvm-s390"
  50 #undef pr_fmt
  51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  52
  53 #define CREATE_TRACE_POINTS
  54 #include "trace.h"
  55 #include "trace-s390.h"
  56
  57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  58 #define LOCAL_IRQS 32
  59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  61
  62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  63
  64 struct kvm_stats_debugfs_item debugfs_entries[] = {
  65         { "userspace_handled", VCPU_STAT(exit_userspace) },
  66         { "exit_null", VCPU_STAT(exit_null) },
  67         { "exit_validity", VCPU_STAT(exit_validity) },
  68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  69         { "exit_external_request", VCPU_STAT(exit_external_request) },
  70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  71         { "exit_instruction", VCPU_STAT(exit_instruction) },
  72         { "exit_pei", VCPU_STAT(exit_pei) },
  73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  95         { "instruction_spx", VCPU_STAT(instruction_spx) },
  96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  97         { "instruction_stap", VCPU_STAT(instruction_stap) },
  98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
 101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 102         { "instruction_essa", VCPU_STAT(instruction_essa) },
 103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 107         { "instruction_sie", VCPU_STAT(instruction_sie) },
 108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 124         { "diagnose_10", VCPU_STAT(diagnose_10) },
 125         { "diagnose_44", VCPU_STAT(diagnose_44) },
 126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 127         { "diagnose_258", VCPU_STAT(diagnose_258) },
 128         { "diagnose_308", VCPU_STAT(diagnose_308) },
 129         { "diagnose_500", VCPU_STAT(diagnose_500) },
 130         { NULL }
 131 };
 132
 133 /* allow nested virtualization in KVM (if enabled by user space) */
 134 static int nested;
 135 module_param(nested, int, S_IRUGO);
 136 MODULE_PARM_DESC(nested, "Nested virtualization support");
 137
 138 /* upper facilities limit for kvm */
 139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 140
 141 unsigned long kvm_s390_fac_list_mask_size(void)
 142 {
 143         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 144         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 145 }
 146
 147 /* available cpu features supported by kvm */
 148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 149 /* available subfunctions indicated via query / "test bit" */
 150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 151
 152 static struct gmap_notifier gmap_notifier;
 153 static struct gmap_notifier vsie_gmap_notifier;
 154 debug_info_t *kvm_s390_dbf;
 155
 156 /* Section: not file related */
 157 int kvm_arch_hardware_enable(void)
 158 {
 159         /* every s390 is virtualization enabled ;-) */
 160         return 0;
 161 }
 162
 163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 164                               unsigned long end);
 165
 166 /*
 167  * This callback is executed during stop_machine(). All CPUs are therefore
 168  * temporarily stopped. In order not to change guest behavior, we have to
 169  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 170  * so a CPU won't be stopped while calculating with the epoch.
 171  */
 172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 173                           void *v)
 174 {
 175         struct kvm *kvm;
 176         struct kvm_vcpu *vcpu;
 177         int i;
 178         unsigned long long *delta = v;
 179
 180         list_for_each_entry(kvm, &vm_list, vm_list) {
 181                 kvm->arch.epoch -= *delta;
 182                 kvm_for_each_vcpu(i, vcpu, kvm) {
 183                         vcpu->arch.sie_block->epoch -= *delta;
 184                         if (vcpu->arch.cputm_enabled)
 185                                 vcpu->arch.cputm_start += *delta;
 186                         if (vcpu->arch.vsie_block)
 187                                 vcpu->arch.vsie_block->epoch -= *delta;
 188                 }
 189         }
 190         return NOTIFY_OK;
 191 }
 192
 193 static struct notifier_block kvm_clock_notifier = {
 194         .notifier_call = kvm_clock_sync,
 195 };
 196
 197 int kvm_arch_hardware_setup(void)
 198 {
 199         gmap_notifier.notifier_call = kvm_gmap_notifier;
 200         gmap_register_pte_notifier(&gmap_notifier);
 201         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 202         gmap_register_pte_notifier(&vsie_gmap_notifier);
 203         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 204                                        &kvm_clock_notifier);
 205         return 0;
 206 }
 207
 208 void kvm_arch_hardware_unsetup(void)
 209 {
 210         gmap_unregister_pte_notifier(&gmap_notifier);
 211         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 212         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 213                                          &kvm_clock_notifier);
 214 }
 215
 216 static void allow_cpu_feat(unsigned long nr)
 217 {
 218         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 219 }
 220
 221 static inline int plo_test_bit(unsigned char nr)
 222 {
 223         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 224         int cc;
 225
 226         asm volatile(
 227                 /* Parameter registers are ignored for "test bit" */
 228                 "       plo     0,0,0,0(0)\n"
 229                 "       ipm     %0\n"
 230                 "       srl     %0,28\n"
 231                 : "=d" (cc)
 232                 : "d" (r0)
 233                 : "cc");
 234         return cc == 0;
 235 }
 236
 237 static void kvm_s390_cpu_feat_init(void)
 238 {
 239         int i;
 240
 241         for (i = 0; i < 256; ++i) {
 242                 if (plo_test_bit(i))
 243                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 244         }
 245
 246         if (test_facility(28)) /* TOD-clock steering */
 247                 ptff(kvm_s390_available_subfunc.ptff,
 248                      sizeof(kvm_s390_available_subfunc.ptff),
 249                      PTFF_QAF);
 250
 251         if (test_facility(17)) { /* MSA */
 252                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 253                               kvm_s390_available_subfunc.kmac);
 254                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 255                               kvm_s390_available_subfunc.kmc);
 256                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 257                               kvm_s390_available_subfunc.km);
 258                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 259                               kvm_s390_available_subfunc.kimd);
 260                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 261                               kvm_s390_available_subfunc.klmd);
 262         }
 263         if (test_facility(76)) /* MSA3 */
 264                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 265                               kvm_s390_available_subfunc.pckmo);
 266         if (test_facility(77)) { /* MSA4 */
 267                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 268                               kvm_s390_available_subfunc.kmctr);
 269                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 270                               kvm_s390_available_subfunc.kmf);
 271                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 272                               kvm_s390_available_subfunc.kmo);
 273                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 274                               kvm_s390_available_subfunc.pcc);
 275         }
 276         if (test_facility(57)) /* MSA5 */
 277                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 278                               kvm_s390_available_subfunc.ppno);
 279
 280         if (test_facility(146)) /* MSA8 */
 281                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 282                               kvm_s390_available_subfunc.kma);
 283
 284         if (MACHINE_HAS_ESOP)
 285                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 286         /*
 287          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 288          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 289          */
 290         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 291             !test_facility(3) || !nested)
 292                 return;
 293         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 294         if (sclp.has_64bscao)
 295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 296         if (sclp.has_siif)
 297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 298         if (sclp.has_gpere)
 299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 300         if (sclp.has_gsls)
 301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 302         if (sclp.has_ib)
 303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 304         if (sclp.has_cei)
 305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 306         if (sclp.has_ibs)
 307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 308         if (sclp.has_kss)
 309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 310         /*
 311          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 312          * all skey handling functions read/set the skey from the PGSTE
 313          * instead of the real storage key.
 314          *
 315          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 316          * pages being detected as preserved although they are resident.
 317          *
 318          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 319          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 320          *
 321          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 322          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 323          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 324          *
 325          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 326          * cannot easily shadow the SCA because of the ipte lock.
 327          */
 328 }
 329
 330 int kvm_arch_init(void *opaque)
 331 {
 332         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 333         if (!kvm_s390_dbf)
 334                 return -ENOMEM;
 335
 336         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 337                 debug_unregister(kvm_s390_dbf);
 338                 return -ENOMEM;
 339         }
 340
 341         kvm_s390_cpu_feat_init();
 342
 343         /* Register floating interrupt controller interface. */
 344         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 345 }
 346
 347 void kvm_arch_exit(void)
 348 {
 349         debug_unregister(kvm_s390_dbf);
 350 }
 351
 352 /* Section: device related */
 353 long kvm_arch_dev_ioctl(struct file *filp,
 354                         unsigned int ioctl, unsigned long arg)
 355 {
 356         if (ioctl == KVM_S390_ENABLE_SIE)
 357                 return s390_enable_sie();
 358         return -EINVAL;
 359 }
 360
 361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 362 {
 363         int r;
 364
 365         switch (ext) {
 366         case KVM_CAP_S390_PSW:
 367         case KVM_CAP_S390_GMAP:
 368         case KVM_CAP_SYNC_MMU:
 369 #ifdef CONFIG_KVM_S390_UCONTROL
 370         case KVM_CAP_S390_UCONTROL:
 371 #endif
 372         case KVM_CAP_ASYNC_PF:
 373         case KVM_CAP_SYNC_REGS:
 374         case KVM_CAP_ONE_REG:
 375         case KVM_CAP_ENABLE_CAP:
 376         case KVM_CAP_S390_CSS_SUPPORT:
 377         case KVM_CAP_IOEVENTFD:
 378         case KVM_CAP_DEVICE_CTRL:
 379         case KVM_CAP_ENABLE_CAP_VM:
 380         case KVM_CAP_S390_IRQCHIP:
 381         case KVM_CAP_VM_ATTRIBUTES:
 382         case KVM_CAP_MP_STATE:
 383         case KVM_CAP_IMMEDIATE_EXIT:
 384         case KVM_CAP_S390_INJECT_IRQ:
 385         case KVM_CAP_S390_USER_SIGP:
 386         case KVM_CAP_S390_USER_STSI:
 387         case KVM_CAP_S390_SKEYS:
 388         case KVM_CAP_S390_IRQ_STATE:
 389         case KVM_CAP_S390_USER_INSTR0:
 390         case KVM_CAP_S390_CMMA_MIGRATION:
 391         case KVM_CAP_S390_AIS:
 392                 r = 1;
 393                 break;
 394         case KVM_CAP_S390_MEM_OP:
 395                 r = MEM_OP_MAX_SIZE;
 396                 break;
 397         case KVM_CAP_NR_VCPUS:
 398         case KVM_CAP_MAX_VCPUS:
 399                 r = KVM_S390_BSCA_CPU_SLOTS;
 400                 if (!kvm_s390_use_sca_entries())
 401                         r = KVM_MAX_VCPUS;
 402                 else if (sclp.has_esca && sclp.has_64bscao)
 403                         r = KVM_S390_ESCA_CPU_SLOTS;
 404                 break;
 405         case KVM_CAP_NR_MEMSLOTS:
 406                 r = KVM_USER_MEM_SLOTS;
 407                 break;
 408         case KVM_CAP_S390_COW:
 409                 r = MACHINE_HAS_ESOP;
 410                 break;
 411         case KVM_CAP_S390_VECTOR_REGISTERS:
 412                 r = MACHINE_HAS_VX;
 413                 break;
 414         case KVM_CAP_S390_RI:
 415                 r = test_facility(64);
 416                 break;
 417         case KVM_CAP_S390_GS:
 418                 r = test_facility(133);
 419                 break;
 420         default:
 421                 r = 0;
 422         }
 423         return r;
 424 }
 425
 426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 427                                         struct kvm_memory_slot *memslot)
 428 {
 429         gfn_t cur_gfn, last_gfn;
 430         unsigned long address;
 431         struct gmap *gmap = kvm->arch.gmap;
 432
 433         /* Loop over all guest pages */
 434         last_gfn = memslot->base_gfn + memslot->npages;
 435         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 436                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 437
 438                 if (test_and_clear_guest_dirty(gmap->mm, address))
 439                         mark_page_dirty(kvm, cur_gfn);
 440                 if (fatal_signal_pending(current))
 441                         return;
 442                 cond_resched();
 443         }
 444 }
 445
 446 /* Section: vm related */
 447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 448
 449 /*
 450  * Get (and clear) the dirty memory log for a memory slot.
 451  */
 452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 453                                struct kvm_dirty_log *log)
 454 {
 455         int r;
 456         unsigned long n;
 457         struct kvm_memslots *slots;
 458         struct kvm_memory_slot *memslot;
 459         int is_dirty = 0;
 460
 461         if (kvm_is_ucontrol(kvm))
 462                 return -EINVAL;
 463
 464         mutex_lock(&kvm->slots_lock);
 465
 466         r = -EINVAL;
 467         if (log->slot >= KVM_USER_MEM_SLOTS)
 468                 goto out;
 469
 470         slots = kvm_memslots(kvm);
 471         memslot = id_to_memslot(slots, log->slot);
 472         r = -ENOENT;
 473         if (!memslot->dirty_bitmap)
 474                 goto out;
 475
 476         kvm_s390_sync_dirty_log(kvm, memslot);
 477         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 478         if (r)
 479                 goto out;
 480
 481         /* Clear the dirty log */
 482         if (is_dirty) {
 483                 n = kvm_dirty_bitmap_bytes(memslot);
 484                 memset(memslot->dirty_bitmap, 0, n);
 485         }
 486         r = 0;
 487 out:
 488         mutex_unlock(&kvm->slots_lock);
 489         return r;
 490 }
 491
 492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 493 {
 494         unsigned int i;
 495         struct kvm_vcpu *vcpu;
 496
 497         kvm_for_each_vcpu(i, vcpu, kvm) {
 498                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 499         }
 500 }
 501
 502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 503 {
 504         int r;
 505
 506         if (cap->flags)
 507                 return -EINVAL;
 508
 509         switch (cap->cap) {
 510         case KVM_CAP_S390_IRQCHIP:
 511                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 512                 kvm->arch.use_irqchip = 1;
 513                 r = 0;
 514                 break;
 515         case KVM_CAP_S390_USER_SIGP:
 516                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 517                 kvm->arch.user_sigp = 1;
 518                 r = 0;
 519                 break;
 520         case KVM_CAP_S390_VECTOR_REGISTERS:
 521                 mutex_lock(&kvm->lock);
 522                 if (kvm->created_vcpus) {
 523                         r = -EBUSY;
 524                 } else if (MACHINE_HAS_VX) {
 525                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 526                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 527                         if (test_facility(134)) {
 528                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 529                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 530                         }
 531                         if (test_facility(135)) {
 532                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 533                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 534                         }
 535                         r = 0;
 536                 } else
 537                         r = -EINVAL;
 538                 mutex_unlock(&kvm->lock);
 539                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 540                          r ? "(not available)" : "(success)");
 541                 break;
 542         case KVM_CAP_S390_RI:
 543                 r = -EINVAL;
 544                 mutex_lock(&kvm->lock);
 545                 if (kvm->created_vcpus) {
 546                         r = -EBUSY;
 547                 } else if (test_facility(64)) {
 548                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 549                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 550                         r = 0;
 551                 }
 552                 mutex_unlock(&kvm->lock);
 553                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 554                          r ? "(not available)" : "(success)");
 555                 break;
 556         case KVM_CAP_S390_AIS:
 557                 mutex_lock(&kvm->lock);
 558                 if (kvm->created_vcpus) {
 559                         r = -EBUSY;
 560                 } else {
 561                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 562                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 563                         r = 0;
 564                 }
 565                 mutex_unlock(&kvm->lock);
 566                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 567                          r ? "(not available)" : "(success)");
 568                 break;
 569         case KVM_CAP_S390_GS:
 570                 r = -EINVAL;
 571                 mutex_lock(&kvm->lock);
 572                 if (atomic_read(&kvm->online_vcpus)) {
 573                         r = -EBUSY;
 574                 } else if (test_facility(133)) {
 575                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 576                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 577                         r = 0;
 578                 }
 579                 mutex_unlock(&kvm->lock);
 580                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 581                          r ? "(not available)" : "(success)");
 582                 break;
 583         case KVM_CAP_S390_USER_STSI:
 584                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 585                 kvm->arch.user_stsi = 1;
 586                 r = 0;
 587                 break;
 588         case KVM_CAP_S390_USER_INSTR0:
 589                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 590                 kvm->arch.user_instr0 = 1;
 591                 icpt_operexc_on_all_vcpus(kvm);
 592                 r = 0;
 593                 break;
 594         default:
 595                 r = -EINVAL;
 596                 break;
 597         }
 598         return r;
 599 }
 600
 601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 602 {
 603         int ret;
 604
 605         switch (attr->attr) {
 606         case KVM_S390_VM_MEM_LIMIT_SIZE:
 607                 ret = 0;
 608                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 609                          kvm->arch.mem_limit);
 610                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 611                         ret = -EFAULT;
 612                 break;
 613         default:
 614                 ret = -ENXIO;
 615                 break;
 616         }
 617         return ret;
 618 }
 619
 620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 621 {
 622         int ret;
 623         unsigned int idx;
 624         switch (attr->attr) {
 625         case KVM_S390_VM_MEM_ENABLE_CMMA:
 626                 ret = -ENXIO;
 627                 if (!sclp.has_cmma)
 628                         break;
 629
 630                 ret = -EBUSY;
 631                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 632                 mutex_lock(&kvm->lock);
 633                 if (!kvm->created_vcpus) {
 634                         kvm->arch.use_cmma = 1;
 635                         ret = 0;
 636                 }
 637                 mutex_unlock(&kvm->lock);
 638                 break;
 639         case KVM_S390_VM_MEM_CLR_CMMA:
 640                 ret = -ENXIO;
 641                 if (!sclp.has_cmma)
 642                         break;
 643                 ret = -EINVAL;
 644                 if (!kvm->arch.use_cmma)
 645                         break;
 646
 647                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 648                 mutex_lock(&kvm->lock);
 649                 idx = srcu_read_lock(&kvm->srcu);
 650                 s390_reset_cmma(kvm->arch.gmap->mm);
 651                 srcu_read_unlock(&kvm->srcu, idx);
 652                 mutex_unlock(&kvm->lock);
 653                 ret = 0;
 654                 break;
 655         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 656                 unsigned long new_limit;
 657
 658                 if (kvm_is_ucontrol(kvm))
 659                         return -EINVAL;
 660
 661                 if (get_user(new_limit, (u64 __user *)attr->addr))
 662                         return -EFAULT;
 663
 664                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 665                     new_limit > kvm->arch.mem_limit)
 666                         return -E2BIG;
 667
 668                 if (!new_limit)
 669                         return -EINVAL;
 670
 671                 /* gmap_create takes last usable address */
 672                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 673                         new_limit -= 1;
 674
 675                 ret = -EBUSY;
 676                 mutex_lock(&kvm->lock);
 677                 if (!kvm->created_vcpus) {
 678                         /* gmap_create will round the limit up */
 679                         struct gmap *new = gmap_create(current->mm, new_limit);
 680
 681                         if (!new) {
 682                                 ret = -ENOMEM;
 683                         } else {
 684                                 gmap_remove(kvm->arch.gmap);
 685                                 new->private = kvm;
 686                                 kvm->arch.gmap = new;
 687                                 ret = 0;
 688                         }
 689                 }
 690                 mutex_unlock(&kvm->lock);
 691                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 692                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 693                          (void *) kvm->arch.gmap->asce);
 694                 break;
 695         }
 696         default:
 697                 ret = -ENXIO;
 698                 break;
 699         }
 700         return ret;
 701 }
 702
 703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 704
 705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 706 {
 707         struct kvm_vcpu *vcpu;
 708         int i;
 709
 710         if (!test_kvm_facility(kvm, 76))
 711                 return -EINVAL;
 712
 713         mutex_lock(&kvm->lock);
 714         switch (attr->attr) {
 715         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 716                 get_random_bytes(
 717                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 718                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 719                 kvm->arch.crypto.aes_kw = 1;
 720                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 721                 break;
 722         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 723                 get_random_bytes(
 724                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 725                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 726                 kvm->arch.crypto.dea_kw = 1;
 727                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 728                 break;
 729         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 730                 kvm->arch.crypto.aes_kw = 0;
 731                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 732                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 733                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 734                 break;
 735         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 736                 kvm->arch.crypto.dea_kw = 0;
 737                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 738                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 739                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 740                 break;
 741         default:
 742                 mutex_unlock(&kvm->lock);
 743                 return -ENXIO;
 744         }
 745
 746         kvm_for_each_vcpu(i, vcpu, kvm) {
 747                 kvm_s390_vcpu_crypto_setup(vcpu);
 748                 exit_sie(vcpu);
 749         }
 750         mutex_unlock(&kvm->lock);
 751         return 0;
 752 }
 753
 754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 755 {
 756         int cx;
 757         struct kvm_vcpu *vcpu;
 758
 759         kvm_for_each_vcpu(cx, vcpu, kvm)
 760                 kvm_s390_sync_request(req, vcpu);
 761 }
 762
 763 /*
 764  * Must be called with kvm->srcu held to avoid races on memslots, and with
 765  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 766  */
 767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 768 {
 769         struct kvm_s390_migration_state *mgs;
 770         struct kvm_memory_slot *ms;
 771         /* should be the only one */
 772         struct kvm_memslots *slots;
 773         unsigned long ram_pages;
 774         int slotnr;
 775
 776         /* migration mode already enabled */
 777         if (kvm->arch.migration_state)
 778                 return 0;
 779
 780         slots = kvm_memslots(kvm);
 781         if (!slots || !slots->used_slots)
 782                 return -EINVAL;
 783
 784         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 785         if (!mgs)
 786                 return -ENOMEM;
 787         kvm->arch.migration_state = mgs;
 788
 789         if (kvm->arch.use_cmma) {
 790                 /*
 791                  * Get the last slot. They should be sorted by base_gfn, so the
 792                  * last slot is also the one at the end of the address space.
 793                  * We have verified above that at least one slot is present.
 794                  */
 795                 ms = slots->memslots + slots->used_slots - 1;
 796                 /* round up so we only use full longs */
 797                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 798                 /* allocate enough bytes to store all the bits */
 799                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 800                 if (!mgs->pgste_bitmap) {
 801                         kfree(mgs);
 802                         kvm->arch.migration_state = NULL;
 803                         return -ENOMEM;
 804                 }
 805
 806                 mgs->bitmap_size = ram_pages;
 807                 atomic64_set(&mgs->dirty_pages, ram_pages);
 808                 /* mark all the pages in active slots as dirty */
 809                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 810                         ms = slots->memslots + slotnr;
 811                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 812                 }
 813
 814                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 815         }
 816         return 0;
 817 }
 818
 819 /*
 820  * Must be called with kvm->lock to avoid races with ourselves and
 821  * kvm_s390_vm_start_migration.
 822  */
 823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 824 {
 825         struct kvm_s390_migration_state *mgs;
 826
 827         /* migration mode already disabled */
 828         if (!kvm->arch.migration_state)
 829                 return 0;
 830         mgs = kvm->arch.migration_state;
 831         kvm->arch.migration_state = NULL;
 832
 833         if (kvm->arch.use_cmma) {
 834                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 835                 vfree(mgs->pgste_bitmap);
 836         }
 837         kfree(mgs);
 838         return 0;
 839 }
 840
 841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 842                                      struct kvm_device_attr *attr)
 843 {
 844         int idx, res = -ENXIO;
 845
 846         mutex_lock(&kvm->lock);
 847         switch (attr->attr) {
 848         case KVM_S390_VM_MIGRATION_START:
 849                 idx = srcu_read_lock(&kvm->srcu);
 850                 res = kvm_s390_vm_start_migration(kvm);
 851                 srcu_read_unlock(&kvm->srcu, idx);
 852                 break;
 853         case KVM_S390_VM_MIGRATION_STOP:
 854                 res = kvm_s390_vm_stop_migration(kvm);
 855                 break;
 856         default:
 857                 break;
 858         }
 859         mutex_unlock(&kvm->lock);
 860
 861         return res;
 862 }
 863
 864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 865                                      struct kvm_device_attr *attr)
 866 {
 867         u64 mig = (kvm->arch.migration_state != NULL);
 868
 869         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 870                 return -ENXIO;
 871
 872         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 873                 return -EFAULT;
 874         return 0;
 875 }
 876
 877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 878 {
 879         u8 gtod_high;
 880
 881         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 882                                            sizeof(gtod_high)))
 883                 return -EFAULT;
 884
 885         if (gtod_high != 0)
 886                 return -EINVAL;
 887         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 888
 889         return 0;
 890 }
 891
 892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 893 {
 894         u64 gtod;
 895
 896         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 897                 return -EFAULT;
 898
 899         kvm_s390_set_tod_clock(kvm, gtod);
 900         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 901         return 0;
 902 }
 903
 904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 905 {
 906         int ret;
 907
 908         if (attr->flags)
 909                 return -EINVAL;
 910
 911         switch (attr->attr) {
 912         case KVM_S390_VM_TOD_HIGH:
 913                 ret = kvm_s390_set_tod_high(kvm, attr);
 914                 break;
 915         case KVM_S390_VM_TOD_LOW:
 916                 ret = kvm_s390_set_tod_low(kvm, attr);
 917                 break;
 918         default:
 919                 ret = -ENXIO;
 920                 break;
 921         }
 922         return ret;
 923 }
 924
 925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 926 {
 927         u8 gtod_high = 0;
 928
 929         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 930                                          sizeof(gtod_high)))
 931                 return -EFAULT;
 932         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 933
 934         return 0;
 935 }
 936
 937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 938 {
 939         u64 gtod;
 940
 941         gtod = kvm_s390_get_tod_clock_fast(kvm);
 942         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 943                 return -EFAULT;
 944         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 945
 946         return 0;
 947 }
 948
 949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 950 {
 951         int ret;
 952
 953         if (attr->flags)
 954                 return -EINVAL;
 955
 956         switch (attr->attr) {
 957         case KVM_S390_VM_TOD_HIGH:
 958                 ret = kvm_s390_get_tod_high(kvm, attr);
 959                 break;
 960         case KVM_S390_VM_TOD_LOW:
 961                 ret = kvm_s390_get_tod_low(kvm, attr);
 962                 break;
 963         default:
 964                 ret = -ENXIO;
 965                 break;
 966         }
 967         return ret;
 968 }
 969
 970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 971 {
 972         struct kvm_s390_vm_cpu_processor *proc;
 973         u16 lowest_ibc, unblocked_ibc;
 974         int ret = 0;
 975
 976         mutex_lock(&kvm->lock);
 977         if (kvm->created_vcpus) {
 978                 ret = -EBUSY;
 979                 goto out;
 980         }
 981         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 982         if (!proc) {
 983                 ret = -ENOMEM;
 984                 goto out;
 985         }
 986         if (!copy_from_user(proc, (void __user *)attr->addr,
 987                             sizeof(*proc))) {
 988                 kvm->arch.model.cpuid = proc->cpuid;
 989                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 990                 unblocked_ibc = sclp.ibc & 0xfff;
 991                 if (lowest_ibc && proc->ibc) {
 992                         if (proc->ibc > unblocked_ibc)
 993                                 kvm->arch.model.ibc = unblocked_ibc;
 994                         else if (proc->ibc < lowest_ibc)
 995                                 kvm->arch.model.ibc = lowest_ibc;
 996                         else
 997                                 kvm->arch.model.ibc = proc->ibc;
 998                 }
 999                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1001                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002                          kvm->arch.model.ibc,
1003                          kvm->arch.model.cpuid);
1004                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005                          kvm->arch.model.fac_list[0],
1006                          kvm->arch.model.fac_list[1],
1007                          kvm->arch.model.fac_list[2]);
1008         } else
1009                 ret = -EFAULT;
1010         kfree(proc);
1011 out:
1012         mutex_unlock(&kvm->lock);
1013         return ret;
1014 }
1015
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017                                        struct kvm_device_attr *attr)
1018 {
1019         struct kvm_s390_vm_cpu_feat data;
1020         int ret = -EBUSY;
1021
1022         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023                 return -EFAULT;
1024         if (!bitmap_subset((unsigned long *) data.feat,
1025                            kvm_s390_available_cpu_feat,
1026                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1027                 return -EINVAL;
1028
1029         mutex_lock(&kvm->lock);
1030         if (!atomic_read(&kvm->online_vcpus)) {
1031                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1033                 ret = 0;
1034         }
1035         mutex_unlock(&kvm->lock);
1036         return ret;
1037 }
1038
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040                                           struct kvm_device_attr *attr)
1041 {
1042         /*
1043          * Once supported by kernel + hw, we have to store the subfunctions
1044          * in kvm->arch and remember that user space configured them.
1045          */
1046         return -ENXIO;
1047 }
1048
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051         int ret = -ENXIO;
1052
1053         switch (attr->attr) {
1054         case KVM_S390_VM_CPU_PROCESSOR:
1055                 ret = kvm_s390_set_processor(kvm, attr);
1056                 break;
1057         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058                 ret = kvm_s390_set_processor_feat(kvm, attr);
1059                 break;
1060         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062                 break;
1063         }
1064         return ret;
1065 }
1066
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069         struct kvm_s390_vm_cpu_processor *proc;
1070         int ret = 0;
1071
1072         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073         if (!proc) {
1074                 ret = -ENOMEM;
1075                 goto out;
1076         }
1077         proc->cpuid = kvm->arch.model.cpuid;
1078         proc->ibc = kvm->arch.model.ibc;
1079         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080                S390_ARCH_FAC_LIST_SIZE_BYTE);
1081         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082                  kvm->arch.model.ibc,
1083                  kvm->arch.model.cpuid);
1084         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085                  kvm->arch.model.fac_list[0],
1086                  kvm->arch.model.fac_list[1],
1087                  kvm->arch.model.fac_list[2]);
1088         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089                 ret = -EFAULT;
1090         kfree(proc);
1091 out:
1092         return ret;
1093 }
1094
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097         struct kvm_s390_vm_cpu_machine *mach;
1098         int ret = 0;
1099
1100         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101         if (!mach) {
1102                 ret = -ENOMEM;
1103                 goto out;
1104         }
1105         get_cpu_id((struct cpuid *) &mach->cpuid);
1106         mach->ibc = sclp.ibc;
1107         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108                S390_ARCH_FAC_LIST_SIZE_BYTE);
1109         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110                sizeof(S390_lowcore.stfle_fac_list));
1111         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1112                  kvm->arch.model.ibc,
1113                  kvm->arch.model.cpuid);
1114         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1115                  mach->fac_mask[0],
1116                  mach->fac_mask[1],
1117                  mach->fac_mask[2]);
1118         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1119                  mach->fac_list[0],
1120                  mach->fac_list[1],
1121                  mach->fac_list[2]);
1122         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123                 ret = -EFAULT;
1124         kfree(mach);
1125 out:
1126         return ret;
1127 }
1128
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130                                        struct kvm_device_attr *attr)
1131 {
1132         struct kvm_s390_vm_cpu_feat data;
1133
1134         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1136         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137                 return -EFAULT;
1138         return 0;
1139 }
1140
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142                                      struct kvm_device_attr *attr)
1143 {
1144         struct kvm_s390_vm_cpu_feat data;
1145
1146         bitmap_copy((unsigned long *) data.feat,
1147                     kvm_s390_available_cpu_feat,
1148                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1149         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150                 return -EFAULT;
1151         return 0;
1152 }
1153
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155                                           struct kvm_device_attr *attr)
1156 {
1157         /*
1158          * Once we can actually configure subfunctions (kernel + hw support),
1159          * we have to check if they were already set by user space, if so copy
1160          * them from kvm->arch.
1161          */
1162         return -ENXIO;
1163 }
1164
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166                                         struct kvm_device_attr *attr)
1167 {
1168         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170                 return -EFAULT;
1171         return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         int ret = -ENXIO;
1176
1177         switch (attr->attr) {
1178         case KVM_S390_VM_CPU_PROCESSOR:
1179                 ret = kvm_s390_get_processor(kvm, attr);
1180                 break;
1181         case KVM_S390_VM_CPU_MACHINE:
1182                 ret = kvm_s390_get_machine(kvm, attr);
1183                 break;
1184         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185                 ret = kvm_s390_get_processor_feat(kvm, attr);
1186                 break;
1187         case KVM_S390_VM_CPU_MACHINE_FEAT:
1188                 ret = kvm_s390_get_machine_feat(kvm, attr);
1189                 break;
1190         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192                 break;
1193         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195                 break;
1196         }
1197         return ret;
1198 }
1199
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         int ret;
1203
1204         switch (attr->group) {
1205         case KVM_S390_VM_MEM_CTRL:
1206                 ret = kvm_s390_set_mem_control(kvm, attr);
1207                 break;
1208         case KVM_S390_VM_TOD:
1209                 ret = kvm_s390_set_tod(kvm, attr);
1210                 break;
1211         case KVM_S390_VM_CPU_MODEL:
1212                 ret = kvm_s390_set_cpu_model(kvm, attr);
1213                 break;
1214         case KVM_S390_VM_CRYPTO:
1215                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1216                 break;
1217         case KVM_S390_VM_MIGRATION:
1218                 ret = kvm_s390_vm_set_migration(kvm, attr);
1219                 break;
1220         default:
1221                 ret = -ENXIO;
1222                 break;
1223         }
1224
1225         return ret;
1226 }
1227
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         int ret;
1231
1232         switch (attr->group) {
1233         case KVM_S390_VM_MEM_CTRL:
1234                 ret = kvm_s390_get_mem_control(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_TOD:
1237                 ret = kvm_s390_get_tod(kvm, attr);
1238                 break;
1239         case KVM_S390_VM_CPU_MODEL:
1240                 ret = kvm_s390_get_cpu_model(kvm, attr);
1241                 break;
1242         case KVM_S390_VM_MIGRATION:
1243                 ret = kvm_s390_vm_get_migration(kvm, attr);
1244                 break;
1245         default:
1246                 ret = -ENXIO;
1247                 break;
1248         }
1249
1250         return ret;
1251 }
1252
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255         int ret;
1256
1257         switch (attr->group) {
1258         case KVM_S390_VM_MEM_CTRL:
1259                 switch (attr->attr) {
1260                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1261                 case KVM_S390_VM_MEM_CLR_CMMA:
1262                         ret = sclp.has_cmma ? 0 : -ENXIO;
1263                         break;
1264                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1265                         ret = 0;
1266                         break;
1267                 default:
1268                         ret = -ENXIO;
1269                         break;
1270                 }
1271                 break;
1272         case KVM_S390_VM_TOD:
1273                 switch (attr->attr) {
1274                 case KVM_S390_VM_TOD_LOW:
1275                 case KVM_S390_VM_TOD_HIGH:
1276                         ret = 0;
1277                         break;
1278                 default:
1279                         ret = -ENXIO;
1280                         break;
1281                 }
1282                 break;
1283         case KVM_S390_VM_CPU_MODEL:
1284                 switch (attr->attr) {
1285                 case KVM_S390_VM_CPU_PROCESSOR:
1286                 case KVM_S390_VM_CPU_MACHINE:
1287                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1289                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290                         ret = 0;
1291                         break;
1292                 /* configuring subfunctions is not supported yet */
1293                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294                 default:
1295                         ret = -ENXIO;
1296                         break;
1297                 }
1298                 break;
1299         case KVM_S390_VM_CRYPTO:
1300                 switch (attr->attr) {
1301                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305                         ret = 0;
1306                         break;
1307                 default:
1308                         ret = -ENXIO;
1309                         break;
1310                 }
1311                 break;
1312         case KVM_S390_VM_MIGRATION:
1313                 ret = 0;
1314                 break;
1315         default:
1316                 ret = -ENXIO;
1317                 break;
1318         }
1319
1320         return ret;
1321 }
1322
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325         uint8_t *keys;
1326         uint64_t hva;
1327         int srcu_idx, i, r = 0;
1328
1329         if (args->flags != 0)
1330                 return -EINVAL;
1331
1332         /* Is this guest using storage keys? */
1333         if (!mm_use_skey(current->mm))
1334                 return KVM_S390_GET_SKEYS_NONE;
1335
1336         /* Enforce sane limit on memory allocation */
1337         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338                 return -EINVAL;
1339
1340         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341         if (!keys)
1342                 return -ENOMEM;
1343
1344         down_read(&current->mm->mmap_sem);
1345         srcu_idx = srcu_read_lock(&kvm->srcu);
1346         for (i = 0; i < args->count; i++) {
1347                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1348                 if (kvm_is_error_hva(hva)) {
1349                         r = -EFAULT;
1350                         break;
1351                 }
1352
1353                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1354                 if (r)
1355                         break;
1356         }
1357         srcu_read_unlock(&kvm->srcu, srcu_idx);
1358         up_read(&current->mm->mmap_sem);
1359
1360         if (!r) {
1361                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1362                                  sizeof(uint8_t) * args->count);
1363                 if (r)
1364                         r = -EFAULT;
1365         }
1366
1367         kvfree(keys);
1368         return r;
1369 }
1370
1371 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1372 {
1373         uint8_t *keys;
1374         uint64_t hva;
1375         int srcu_idx, i, r = 0;
1376
1377         if (args->flags != 0)
1378                 return -EINVAL;
1379
1380         /* Enforce sane limit on memory allocation */
1381         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1382                 return -EINVAL;
1383
1384         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1385         if (!keys)
1386                 return -ENOMEM;
1387
1388         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1389                            sizeof(uint8_t) * args->count);
1390         if (r) {
1391                 r = -EFAULT;
1392                 goto out;
1393         }
1394
1395         /* Enable storage key handling for the guest */
1396         r = s390_enable_skey();
1397         if (r)
1398                 goto out;
1399
1400         down_read(&current->mm->mmap_sem);
1401         srcu_idx = srcu_read_lock(&kvm->srcu);
1402         for (i = 0; i < args->count; i++) {
1403                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1404                 if (kvm_is_error_hva(hva)) {
1405                         r = -EFAULT;
1406                         break;
1407                 }
1408
1409                 /* Lowest order bit is reserved */
1410                 if (keys[i] & 0x01) {
1411                         r = -EINVAL;
1412                         break;
1413                 }
1414
1415                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1416                 if (r)
1417                         break;
1418         }
1419         srcu_read_unlock(&kvm->srcu, srcu_idx);
1420         up_read(&current->mm->mmap_sem);
1421 out:
1422         kvfree(keys);
1423         return r;
1424 }
1425
1426 /*
1427  * Base address and length must be sent at the start of each block, therefore
1428  * it's cheaper to send some clean data, as long as it's less than the size of
1429  * two longs.
1430  */
1431 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1432 /* for consistency */
1433 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1434
1435 /*
1436  * This function searches for the next page with dirty CMMA attributes, and
1437  * saves the attributes in the buffer up to either the end of the buffer or
1438  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1439  * no trailing clean bytes are saved.
1440  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1441  * output buffer will indicate 0 as length.
1442  */
1443 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1444                                   struct kvm_s390_cmma_log *args)
1445 {
1446         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1447         unsigned long bufsize, hva, pgstev, i, next, cur;
1448         int srcu_idx, peek, r = 0, rr;
1449         u8 *res;
1450
1451         cur = args->start_gfn;
1452         i = next = pgstev = 0;
1453
1454         if (unlikely(!kvm->arch.use_cmma))
1455                 return -ENXIO;
1456         /* Invalid/unsupported flags were specified */
1457         if (args->flags & ~KVM_S390_CMMA_PEEK)
1458                 return -EINVAL;
1459         /* Migration mode query, and we are not doing a migration */
1460         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1461         if (!peek && !s)
1462                 return -EINVAL;
1463         /* CMMA is disabled or was not used, or the buffer has length zero */
1464         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1465         if (!bufsize || !kvm->mm->context.use_cmma) {
1466                 memset(args, 0, sizeof(*args));
1467                 return 0;
1468         }
1469
1470         if (!peek) {
1471                 /* We are not peeking, and there are no dirty pages */
1472                 if (!atomic64_read(&s->dirty_pages)) {
1473                         memset(args, 0, sizeof(*args));
1474                         return 0;
1475                 }
1476                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1477                                     args->start_gfn);
1478                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1479                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1480                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1481                         memset(args, 0, sizeof(*args));
1482                         return 0;
1483                 }
1484                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1485         }
1486
1487         res = vmalloc(bufsize);
1488         if (!res)
1489                 return -ENOMEM;
1490
1491         args->start_gfn = cur;
1492
1493         down_read(&kvm->mm->mmap_sem);
1494         srcu_idx = srcu_read_lock(&kvm->srcu);
1495         while (i < bufsize) {
1496                 hva = gfn_to_hva(kvm, cur);
1497                 if (kvm_is_error_hva(hva)) {
1498                         r = -EFAULT;
1499                         break;
1500                 }
1501                 /* decrement only if we actually flipped the bit to 0 */
1502                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1503                         atomic64_dec(&s->dirty_pages);
1504                 r = get_pgste(kvm->mm, hva, &pgstev);
1505                 if (r < 0)
1506                         pgstev = 0;
1507                 /* save the value */
1508                 res[i++] = (pgstev >> 24) & 0x3;
1509                 /*
1510                  * if the next bit is too far away, stop.
1511                  * if we reached the previous "next", find the next one
1512                  */
1513                 if (!peek) {
1514                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1515                                 break;
1516                         if (cur == next)
1517                                 next = find_next_bit(s->pgste_bitmap,
1518                                                      s->bitmap_size, cur + 1);
1519                 /* reached the end of the bitmap or of the buffer, stop */
1520                         if ((next >= s->bitmap_size) ||
1521                             (next >= args->start_gfn + bufsize))
1522                                 break;
1523                 }
1524                 cur++;
1525         }
1526         srcu_read_unlock(&kvm->srcu, srcu_idx);
1527         up_read(&kvm->mm->mmap_sem);
1528         args->count = i;
1529         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1530
1531         rr = copy_to_user((void __user *)args->values, res, args->count);
1532         if (rr)
1533                 r = -EFAULT;
1534
1535         vfree(res);
1536         return r;
1537 }
1538
1539 /*
1540  * This function sets the CMMA attributes for the given pages. If the input
1541  * buffer has zero length, no action is taken, otherwise the attributes are
1542  * set and the mm->context.use_cmma flag is set.
1543  */
1544 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1545                                   const struct kvm_s390_cmma_log *args)
1546 {
1547         unsigned long hva, mask, pgstev, i;
1548         uint8_t *bits;
1549         int srcu_idx, r = 0;
1550
1551         mask = args->mask;
1552
1553         if (!kvm->arch.use_cmma)
1554                 return -ENXIO;
1555         /* invalid/unsupported flags */
1556         if (args->flags != 0)
1557                 return -EINVAL;
1558         /* Enforce sane limit on memory allocation */
1559         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1560                 return -EINVAL;
1561         /* Nothing to do */
1562         if (args->count == 0)
1563                 return 0;
1564
1565         bits = vmalloc(sizeof(*bits) * args->count);
1566         if (!bits)
1567                 return -ENOMEM;
1568
1569         r = copy_from_user(bits, (void __user *)args->values, args->count);
1570         if (r) {
1571                 r = -EFAULT;
1572                 goto out;
1573         }
1574
1575         down_read(&kvm->mm->mmap_sem);
1576         srcu_idx = srcu_read_lock(&kvm->srcu);
1577         for (i = 0; i < args->count; i++) {
1578                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1579                 if (kvm_is_error_hva(hva)) {
1580                         r = -EFAULT;
1581                         break;
1582                 }
1583
1584                 pgstev = bits[i];
1585                 pgstev = pgstev << 24;
1586                 mask &= _PGSTE_GPS_USAGE_MASK;
1587                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1588         }
1589         srcu_read_unlock(&kvm->srcu, srcu_idx);
1590         up_read(&kvm->mm->mmap_sem);
1591
1592         if (!kvm->mm->context.use_cmma) {
1593                 down_write(&kvm->mm->mmap_sem);
1594                 kvm->mm->context.use_cmma = 1;
1595                 up_write(&kvm->mm->mmap_sem);
1596         }
1597 out:
1598         vfree(bits);
1599         return r;
1600 }
1601
1602 long kvm_arch_vm_ioctl(struct file *filp,
1603                        unsigned int ioctl, unsigned long arg)
1604 {
1605         struct kvm *kvm = filp->private_data;
1606         void __user *argp = (void __user *)arg;
1607         struct kvm_device_attr attr;
1608         int r;
1609
1610         switch (ioctl) {
1611         case KVM_S390_INTERRUPT: {
1612                 struct kvm_s390_interrupt s390int;
1613
1614                 r = -EFAULT;
1615                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1616                         break;
1617                 r = kvm_s390_inject_vm(kvm, &s390int);
1618                 break;
1619         }
1620         case KVM_ENABLE_CAP: {
1621                 struct kvm_enable_cap cap;
1622                 r = -EFAULT;
1623                 if (copy_from_user(&cap, argp, sizeof(cap)))
1624                         break;
1625                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1626                 break;
1627         }
1628         case KVM_CREATE_IRQCHIP: {
1629                 struct kvm_irq_routing_entry routing;
1630
1631                 r = -EINVAL;
1632                 if (kvm->arch.use_irqchip) {
1633                         /* Set up dummy routing. */
1634                         memset(&routing, 0, sizeof(routing));
1635                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1636                 }
1637                 break;
1638         }
1639         case KVM_SET_DEVICE_ATTR: {
1640                 r = -EFAULT;
1641                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1642                         break;
1643                 r = kvm_s390_vm_set_attr(kvm, &attr);
1644                 break;
1645         }
1646         case KVM_GET_DEVICE_ATTR: {
1647                 r = -EFAULT;
1648                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1649                         break;
1650                 r = kvm_s390_vm_get_attr(kvm, &attr);
1651                 break;
1652         }
1653         case KVM_HAS_DEVICE_ATTR: {
1654                 r = -EFAULT;
1655                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1656                         break;
1657                 r = kvm_s390_vm_has_attr(kvm, &attr);
1658                 break;
1659         }
1660         case KVM_S390_GET_SKEYS: {
1661                 struct kvm_s390_skeys args;
1662
1663                 r = -EFAULT;
1664                 if (copy_from_user(&args, argp,
1665                                    sizeof(struct kvm_s390_skeys)))
1666                         break;
1667                 r = kvm_s390_get_skeys(kvm, &args);
1668                 break;
1669         }
1670         case KVM_S390_SET_SKEYS: {
1671                 struct kvm_s390_skeys args;
1672
1673                 r = -EFAULT;
1674                 if (copy_from_user(&args, argp,
1675                                    sizeof(struct kvm_s390_skeys)))
1676                         break;
1677                 r = kvm_s390_set_skeys(kvm, &args);
1678                 break;
1679         }
1680         case KVM_S390_GET_CMMA_BITS: {
1681                 struct kvm_s390_cmma_log args;
1682
1683                 r = -EFAULT;
1684                 if (copy_from_user(&args, argp, sizeof(args)))
1685                         break;
1686                 r = kvm_s390_get_cmma_bits(kvm, &args);
1687                 if (!r) {
1688                         r = copy_to_user(argp, &args, sizeof(args));
1689                         if (r)
1690                                 r = -EFAULT;
1691                 }
1692                 break;
1693         }
1694         case KVM_S390_SET_CMMA_BITS: {
1695                 struct kvm_s390_cmma_log args;
1696
1697                 r = -EFAULT;
1698                 if (copy_from_user(&args, argp, sizeof(args)))
1699                         break;
1700                 r = kvm_s390_set_cmma_bits(kvm, &args);
1701                 break;
1702         }
1703         default:
1704                 r = -ENOTTY;
1705         }
1706
1707         return r;
1708 }
1709
1710 static int kvm_s390_query_ap_config(u8 *config)
1711 {
1712         u32 fcn_code = 0x04000000UL;
1713         u32 cc = 0;
1714
1715         memset(config, 0, 128);
1716         asm volatile(
1717                 "lgr 0,%1\n"
1718                 "lgr 2,%2\n"
1719                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1720                 "0: ipm %0\n"
1721                 "srl %0,28\n"
1722                 "1:\n"
1723                 EX_TABLE(0b, 1b)
1724                 : "+r" (cc)
1725                 : "r" (fcn_code), "r" (config)
1726                 : "cc", "0", "2", "memory"
1727         );
1728
1729         return cc;
1730 }
1731
1732 static int kvm_s390_apxa_installed(void)
1733 {
1734         u8 config[128];
1735         int cc;
1736
1737         if (test_facility(12)) {
1738                 cc = kvm_s390_query_ap_config(config);
1739
1740                 if (cc)
1741                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1742                 else
1743                         return config[0] & 0x40;
1744         }
1745
1746         return 0;
1747 }
1748
1749 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1750 {
1751         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1752
1753         if (kvm_s390_apxa_installed())
1754                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1755         else
1756                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1757 }
1758
1759 static u64 kvm_s390_get_initial_cpuid(void)
1760 {
1761         struct cpuid cpuid;
1762
1763         get_cpu_id(&cpuid);
1764         cpuid.version = 0xff;
1765         return *((u64 *) &cpuid);
1766 }
1767
1768 static void kvm_s390_crypto_init(struct kvm *kvm)
1769 {
1770         if (!test_kvm_facility(kvm, 76))
1771                 return;
1772
1773         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1774         kvm_s390_set_crycb_format(kvm);
1775
1776         /* Enable AES/DEA protected key functions by default */
1777         kvm->arch.crypto.aes_kw = 1;
1778         kvm->arch.crypto.dea_kw = 1;
1779         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1780                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1781         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1782                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1783 }
1784
1785 static void sca_dispose(struct kvm *kvm)
1786 {
1787         if (kvm->arch.use_esca)
1788                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1789         else
1790                 free_page((unsigned long)(kvm->arch.sca));
1791         kvm->arch.sca = NULL;
1792 }
1793
1794 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1795 {
1796         gfp_t alloc_flags = GFP_KERNEL;
1797         int i, rc;
1798         char debug_name[16];
1799         static unsigned long sca_offset;
1800
1801         rc = -EINVAL;
1802 #ifdef CONFIG_KVM_S390_UCONTROL
1803         if (type & ~KVM_VM_S390_UCONTROL)
1804                 goto out_err;
1805         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1806                 goto out_err;
1807 #else
1808         if (type)
1809                 goto out_err;
1810 #endif
1811
1812         rc = s390_enable_sie();
1813         if (rc)
1814                 goto out_err;
1815
1816         rc = -ENOMEM;
1817
1818         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1819
1820         kvm->arch.use_esca = 0; /* start with basic SCA */
1821         if (!sclp.has_64bscao)
1822                 alloc_flags |= GFP_DMA;
1823         rwlock_init(&kvm->arch.sca_lock);
1824         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1825         if (!kvm->arch.sca)
1826                 goto out_err;
1827         spin_lock(&kvm_lock);
1828         sca_offset += 16;
1829         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1830                 sca_offset = 0;
1831         kvm->arch.sca = (struct bsca_block *)
1832                         ((char *) kvm->arch.sca + sca_offset);
1833         spin_unlock(&kvm_lock);
1834
1835         sprintf(debug_name, "kvm-%u", current->pid);
1836
1837         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1838         if (!kvm->arch.dbf)
1839                 goto out_err;
1840
1841         kvm->arch.sie_page2 =
1842              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1843         if (!kvm->arch.sie_page2)
1844                 goto out_err;
1845
1846         /* Populate the facility mask initially. */
1847         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1848                sizeof(S390_lowcore.stfle_fac_list));
1849         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1850                 if (i < kvm_s390_fac_list_mask_size())
1851                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1852                 else
1853                         kvm->arch.model.fac_mask[i] = 0UL;
1854         }
1855
1856         /* Populate the facility list initially. */
1857         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1858         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1859                S390_ARCH_FAC_LIST_SIZE_BYTE);
1860
1861         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1862         set_kvm_facility(kvm->arch.model.fac_list, 74);
1863
1864         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1865         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1866
1867         kvm_s390_crypto_init(kvm);
1868
1869         mutex_init(&kvm->arch.float_int.ais_lock);
1870         kvm->arch.float_int.simm = 0;
1871         kvm->arch.float_int.nimm = 0;
1872         spin_lock_init(&kvm->arch.float_int.lock);
1873         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1874                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1875         init_waitqueue_head(&kvm->arch.ipte_wq);
1876         mutex_init(&kvm->arch.ipte_mutex);
1877
1878         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1879         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1880
1881         if (type & KVM_VM_S390_UCONTROL) {
1882                 kvm->arch.gmap = NULL;
1883                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1884         } else {
1885                 if (sclp.hamax == U64_MAX)
1886                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1887                 else
1888                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1889                                                     sclp.hamax + 1);
1890                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1891                 if (!kvm->arch.gmap)
1892                         goto out_err;
1893                 kvm->arch.gmap->private = kvm;
1894                 kvm->arch.gmap->pfault_enabled = 0;
1895         }
1896
1897         kvm->arch.css_support = 0;
1898         kvm->arch.use_irqchip = 0;
1899         kvm->arch.epoch = 0;
1900
1901         spin_lock_init(&kvm->arch.start_stop_lock);
1902         kvm_s390_vsie_init(kvm);
1903         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1904
1905         return 0;
1906 out_err:
1907         free_page((unsigned long)kvm->arch.sie_page2);
1908         debug_unregister(kvm->arch.dbf);
1909         sca_dispose(kvm);
1910         KVM_EVENT(3, "creation of vm failed: %d", rc);
1911         return rc;
1912 }
1913
1914 bool kvm_arch_has_vcpu_debugfs(void)
1915 {
1916         return false;
1917 }
1918
1919 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1920 {
1921         return 0;
1922 }
1923
1924 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1925 {
1926         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1927         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1928         kvm_s390_clear_local_irqs(vcpu);
1929         kvm_clear_async_pf_completion_queue(vcpu);
1930         if (!kvm_is_ucontrol(vcpu->kvm))
1931                 sca_del_vcpu(vcpu);
1932
1933         if (kvm_is_ucontrol(vcpu->kvm))
1934                 gmap_remove(vcpu->arch.gmap);
1935
1936         if (vcpu->kvm->arch.use_cmma)
1937                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1938         free_page((unsigned long)(vcpu->arch.sie_block));
1939
1940         kvm_vcpu_uninit(vcpu);
1941         kmem_cache_free(kvm_vcpu_cache, vcpu);
1942 }
1943
1944 static void kvm_free_vcpus(struct kvm *kvm)
1945 {
1946         unsigned int i;
1947         struct kvm_vcpu *vcpu;
1948
1949         kvm_for_each_vcpu(i, vcpu, kvm)
1950                 kvm_arch_vcpu_destroy(vcpu);
1951
1952         mutex_lock(&kvm->lock);
1953         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1954                 kvm->vcpus[i] = NULL;
1955
1956         atomic_set(&kvm->online_vcpus, 0);
1957         mutex_unlock(&kvm->lock);
1958 }
1959
1960 void kvm_arch_destroy_vm(struct kvm *kvm)
1961 {
1962         kvm_free_vcpus(kvm);
1963         sca_dispose(kvm);
1964         debug_unregister(kvm->arch.dbf);
1965         free_page((unsigned long)kvm->arch.sie_page2);
1966         if (!kvm_is_ucontrol(kvm))
1967                 gmap_remove(kvm->arch.gmap);
1968         kvm_s390_destroy_adapters(kvm);
1969         kvm_s390_clear_float_irqs(kvm);
1970         kvm_s390_vsie_destroy(kvm);
1971         if (kvm->arch.migration_state) {
1972                 vfree(kvm->arch.migration_state->pgste_bitmap);
1973                 kfree(kvm->arch.migration_state);
1974         }
1975         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1976 }
1977
1978 /* Section: vcpu related */
1979 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1980 {
1981         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1982         if (!vcpu->arch.gmap)
1983                 return -ENOMEM;
1984         vcpu->arch.gmap->private = vcpu->kvm;
1985
1986         return 0;
1987 }
1988
1989 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1990 {
1991         if (!kvm_s390_use_sca_entries())
1992                 return;
1993         read_lock(&vcpu->kvm->arch.sca_lock);
1994         if (vcpu->kvm->arch.use_esca) {
1995                 struct esca_block *sca = vcpu->kvm->arch.sca;
1996
1997                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1998                 sca->cpu[vcpu->vcpu_id].sda = 0;
1999         } else {
2000                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2001
2002                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2003                 sca->cpu[vcpu->vcpu_id].sda = 0;
2004         }
2005         read_unlock(&vcpu->kvm->arch.sca_lock);
2006 }
2007
2008 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2009 {
2010         if (!kvm_s390_use_sca_entries()) {
2011                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2012
2013                 /* we still need the basic sca for the ipte control */
2014                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2015                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2016         }
2017         read_lock(&vcpu->kvm->arch.sca_lock);
2018         if (vcpu->kvm->arch.use_esca) {
2019                 struct esca_block *sca = vcpu->kvm->arch.sca;
2020
2021                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2022                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2023                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2024                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2025                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2026         } else {
2027                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2028
2029                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2030                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2031                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2032                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2033         }
2034         read_unlock(&vcpu->kvm->arch.sca_lock);
2035 }
2036
2037 /* Basic SCA to Extended SCA data copy routines */
2038 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2039 {
2040         d->sda = s->sda;
2041         d->sigp_ctrl.c = s->sigp_ctrl.c;
2042         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2043 }
2044
2045 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2046 {
2047         int i;
2048
2049         d->ipte_control = s->ipte_control;
2050         d->mcn[0] = s->mcn;
2051         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2052                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2053 }
2054
2055 static int sca_switch_to_extended(struct kvm *kvm)
2056 {
2057         struct bsca_block *old_sca = kvm->arch.sca;
2058         struct esca_block *new_sca;
2059         struct kvm_vcpu *vcpu;
2060         unsigned int vcpu_idx;
2061         u32 scaol, scaoh;
2062
2063         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2064         if (!new_sca)
2065                 return -ENOMEM;
2066
2067         scaoh = (u32)((u64)(new_sca) >> 32);
2068         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2069
2070         kvm_s390_vcpu_block_all(kvm);
2071         write_lock(&kvm->arch.sca_lock);
2072
2073         sca_copy_b_to_e(new_sca, old_sca);
2074
2075         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2076                 vcpu->arch.sie_block->scaoh = scaoh;
2077                 vcpu->arch.sie_block->scaol = scaol;
2078                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2079         }
2080         kvm->arch.sca = new_sca;
2081         kvm->arch.use_esca = 1;
2082
2083         write_unlock(&kvm->arch.sca_lock);
2084         kvm_s390_vcpu_unblock_all(kvm);
2085
2086         free_page((unsigned long)old_sca);
2087
2088         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2089                  old_sca, kvm->arch.sca);
2090         return 0;
2091 }
2092
2093 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2094 {
2095         int rc;
2096
2097         if (!kvm_s390_use_sca_entries()) {
2098                 if (id < KVM_MAX_VCPUS)
2099                         return true;
2100                 return false;
2101         }
2102         if (id < KVM_S390_BSCA_CPU_SLOTS)
2103                 return true;
2104         if (!sclp.has_esca || !sclp.has_64bscao)
2105                 return false;
2106
2107         mutex_lock(&kvm->lock);
2108         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2109         mutex_unlock(&kvm->lock);
2110
2111         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2112 }
2113
2114 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2115 {
2116         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2117         kvm_clear_async_pf_completion_queue(vcpu);
2118         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2119                                     KVM_SYNC_GPRS |
2120                                     KVM_SYNC_ACRS |
2121                                     KVM_SYNC_CRS |
2122                                     KVM_SYNC_ARCH0 |
2123                                     KVM_SYNC_PFAULT;
2124         kvm_s390_set_prefix(vcpu, 0);
2125         if (test_kvm_facility(vcpu->kvm, 64))
2126                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2127         if (test_kvm_facility(vcpu->kvm, 133))
2128                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2129         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2130          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2131          */
2132         if (MACHINE_HAS_VX)
2133                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2134         else
2135                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2136
2137         if (kvm_is_ucontrol(vcpu->kvm))
2138                 return __kvm_ucontrol_vcpu_init(vcpu);
2139
2140         return 0;
2141 }
2142
2143 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2144 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2145 {
2146         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2147         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2148         vcpu->arch.cputm_start = get_tod_clock_fast();
2149         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2150 }
2151
2152 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2153 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2154 {
2155         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2156         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2157         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2158         vcpu->arch.cputm_start = 0;
2159         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2160 }
2161
2162 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2163 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2164 {
2165         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2166         vcpu->arch.cputm_enabled = true;
2167         __start_cpu_timer_accounting(vcpu);
2168 }
2169
2170 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2171 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2172 {
2173         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2174         __stop_cpu_timer_accounting(vcpu);
2175         vcpu->arch.cputm_enabled = false;
2176 }
2177
2178 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2179 {
2180         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2181         __enable_cpu_timer_accounting(vcpu);
2182         preempt_enable();
2183 }
2184
2185 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2186 {
2187         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2188         __disable_cpu_timer_accounting(vcpu);
2189         preempt_enable();
2190 }
2191
2192 /* set the cpu timer - may only be called from the VCPU thread itself */
2193 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2194 {
2195         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2196         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2197         if (vcpu->arch.cputm_enabled)
2198                 vcpu->arch.cputm_start = get_tod_clock_fast();
2199         vcpu->arch.sie_block->cputm = cputm;
2200         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2201         preempt_enable();
2202 }
2203
2204 /* update and get the cpu timer - can also be called from other VCPU threads */
2205 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2206 {
2207         unsigned int seq;
2208         __u64 value;
2209
2210         if (unlikely(!vcpu->arch.cputm_enabled))
2211                 return vcpu->arch.sie_block->cputm;
2212
2213         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2214         do {
2215                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2216                 /*
2217                  * If the writer would ever execute a read in the critical
2218                  * section, e.g. in irq context, we have a deadlock.
2219                  */
2220                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2221                 value = vcpu->arch.sie_block->cputm;
2222                 /* if cputm_start is 0, accounting is being started/stopped */
2223                 if (likely(vcpu->arch.cputm_start))
2224                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2225         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2226         preempt_enable();
2227         return value;
2228 }
2229
2230 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2231 {
2232
2233         gmap_enable(vcpu->arch.enabled_gmap);
2234         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2235         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2236                 __start_cpu_timer_accounting(vcpu);
2237         vcpu->cpu = cpu;
2238 }
2239
2240 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2241 {
2242         vcpu->cpu = -1;
2243         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2244                 __stop_cpu_timer_accounting(vcpu);
2245         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2246         vcpu->arch.enabled_gmap = gmap_get_enabled();
2247         gmap_disable(vcpu->arch.enabled_gmap);
2248
2249 }
2250
2251 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2252 {
2253         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2254         vcpu->arch.sie_block->gpsw.mask = 0UL;
2255         vcpu->arch.sie_block->gpsw.addr = 0UL;
2256         kvm_s390_set_prefix(vcpu, 0);
2257         kvm_s390_set_cpu_timer(vcpu, 0);
2258         vcpu->arch.sie_block->ckc       = 0UL;
2259         vcpu->arch.sie_block->todpr     = 0;
2260         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2261         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2262         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2263         /* make sure the new fpc will be lazily loaded */
2264         save_fpu_regs();
2265         current->thread.fpu.fpc = 0;
2266         vcpu->arch.sie_block->gbea = 1;
2267         vcpu->arch.sie_block->pp = 0;
2268         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2269         kvm_clear_async_pf_completion_queue(vcpu);
2270         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2271                 kvm_s390_vcpu_stop(vcpu);
2272         kvm_s390_clear_local_irqs(vcpu);
2273 }
2274
2275 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2276 {
2277         mutex_lock(&vcpu->kvm->lock);
2278         preempt_disable();
2279         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2280         preempt_enable();
2281         mutex_unlock(&vcpu->kvm->lock);
2282         if (!kvm_is_ucontrol(vcpu->kvm)) {
2283                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2284                 sca_add_vcpu(vcpu);
2285         }
2286         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2287                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2288         /* make vcpu_load load the right gmap on the first trigger */
2289         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2290 }
2291
2292 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2293 {
2294         if (!test_kvm_facility(vcpu->kvm, 76))
2295                 return;
2296
2297         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2298
2299         if (vcpu->kvm->arch.crypto.aes_kw)
2300                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2301         if (vcpu->kvm->arch.crypto.dea_kw)
2302                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2303
2304         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2305 }
2306
2307 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2308 {
2309         free_page(vcpu->arch.sie_block->cbrlo);
2310         vcpu->arch.sie_block->cbrlo = 0;
2311 }
2312
2313 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2314 {
2315         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2316         if (!vcpu->arch.sie_block->cbrlo)
2317                 return -ENOMEM;
2318
2319         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2320         return 0;
2321 }
2322
2323 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2324 {
2325         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2326
2327         vcpu->arch.sie_block->ibc = model->ibc;
2328         if (test_kvm_facility(vcpu->kvm, 7))
2329                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2330 }
2331
2332 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2333 {
2334         int rc = 0;
2335
2336         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2337                                                     CPUSTAT_SM |
2338                                                     CPUSTAT_STOPPED);
2339
2340         if (test_kvm_facility(vcpu->kvm, 78))
2341                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2342         else if (test_kvm_facility(vcpu->kvm, 8))
2343                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2344
2345         kvm_s390_vcpu_setup_model(vcpu);
2346
2347         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2348         if (MACHINE_HAS_ESOP)
2349                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2350         if (test_kvm_facility(vcpu->kvm, 9))
2351                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2352         if (test_kvm_facility(vcpu->kvm, 73))
2353                 vcpu->arch.sie_block->ecb |= ECB_TE;
2354
2355         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2356                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2357         if (test_kvm_facility(vcpu->kvm, 130))
2358                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2359         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2360         if (sclp.has_cei)
2361                 vcpu->arch.sie_block->eca |= ECA_CEI;
2362         if (sclp.has_ib)
2363                 vcpu->arch.sie_block->eca |= ECA_IB;
2364         if (sclp.has_siif)
2365                 vcpu->arch.sie_block->eca |= ECA_SII;
2366         if (sclp.has_sigpif)
2367                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2368         if (test_kvm_facility(vcpu->kvm, 129)) {
2369                 vcpu->arch.sie_block->eca |= ECA_VX;
2370                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2371         }
2372         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2373                                         | SDNXC;
2374         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2375
2376         if (sclp.has_kss)
2377                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2378         else
2379                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2380
2381         if (vcpu->kvm->arch.use_cmma) {
2382                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2383                 if (rc)
2384                         return rc;
2385         }
2386         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2387         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2388
2389         kvm_s390_vcpu_crypto_setup(vcpu);
2390
2391         return rc;
2392 }
2393
2394 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2395                                       unsigned int id)
2396 {
2397         struct kvm_vcpu *vcpu;
2398         struct sie_page *sie_page;
2399         int rc = -EINVAL;
2400
2401         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2402                 goto out;
2403
2404         rc = -ENOMEM;
2405
2406         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2407         if (!vcpu)
2408                 goto out;
2409
2410         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2411         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2412         if (!sie_page)
2413                 goto out_free_cpu;
2414
2415         vcpu->arch.sie_block = &sie_page->sie_block;
2416         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2417
2418         /* the real guest size will always be smaller than msl */
2419         vcpu->arch.sie_block->mso = 0;
2420         vcpu->arch.sie_block->msl = sclp.hamax;
2421
2422         vcpu->arch.sie_block->icpua = id;
2423         spin_lock_init(&vcpu->arch.local_int.lock);
2424         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2425         vcpu->arch.local_int.wq = &vcpu->wq;
2426         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2427         seqcount_init(&vcpu->arch.cputm_seqcount);
2428
2429         rc = kvm_vcpu_init(vcpu, kvm, id);
2430         if (rc)
2431                 goto out_free_sie_block;
2432         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2433                  vcpu->arch.sie_block);
2434         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2435
2436         return vcpu;
2437 out_free_sie_block:
2438         free_page((unsigned long)(vcpu->arch.sie_block));
2439 out_free_cpu:
2440         kmem_cache_free(kvm_vcpu_cache, vcpu);
2441 out:
2442         return ERR_PTR(rc);
2443 }
2444
2445 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2446 {
2447         return kvm_s390_vcpu_has_irq(vcpu, 0);
2448 }
2449
2450 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2451 {
2452         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2453         exit_sie(vcpu);
2454 }
2455
2456 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2457 {
2458         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2459 }
2460
2461 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2462 {
2463         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2464         exit_sie(vcpu);
2465 }
2466
2467 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2468 {
2469         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2470 }
2471
2472 /*
2473  * Kick a guest cpu out of SIE and wait until SIE is not running.
2474  * If the CPU is not running (e.g. waiting as idle) the function will
2475  * return immediately. */
2476 void exit_sie(struct kvm_vcpu *vcpu)
2477 {
2478         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2479         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2480                 cpu_relax();
2481 }
2482
2483 /* Kick a guest cpu out of SIE to process a request synchronously */
2484 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2485 {
2486         kvm_make_request(req, vcpu);
2487         kvm_s390_vcpu_request(vcpu);
2488 }
2489
2490 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2491                               unsigned long end)
2492 {
2493         struct kvm *kvm = gmap->private;
2494         struct kvm_vcpu *vcpu;
2495         unsigned long prefix;
2496         int i;
2497
2498         if (gmap_is_shadow(gmap))
2499                 return;
2500         if (start >= 1UL << 31)
2501                 /* We are only interested in prefix pages */
2502                 return;
2503         kvm_for_each_vcpu(i, vcpu, kvm) {
2504                 /* match against both prefix pages */
2505                 prefix = kvm_s390_get_prefix(vcpu);
2506                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2507                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2508                                    start, end);
2509                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2510                 }
2511         }
2512 }
2513
2514 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2515 {
2516         /* kvm common code refers to this, but never calls it */
2517         BUG();
2518         return 0;
2519 }
2520
2521 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2522                                            struct kvm_one_reg *reg)
2523 {
2524         int r = -EINVAL;
2525
2526         switch (reg->id) {
2527         case KVM_REG_S390_TODPR:
2528                 r = put_user(vcpu->arch.sie_block->todpr,
2529                              (u32 __user *)reg->addr);
2530                 break;
2531         case KVM_REG_S390_EPOCHDIFF:
2532                 r = put_user(vcpu->arch.sie_block->epoch,
2533                              (u64 __user *)reg->addr);
2534                 break;
2535         case KVM_REG_S390_CPU_TIMER:
2536                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2537                              (u64 __user *)reg->addr);
2538                 break;
2539         case KVM_REG_S390_CLOCK_COMP:
2540                 r = put_user(vcpu->arch.sie_block->ckc,
2541                              (u64 __user *)reg->addr);
2542                 break;
2543         case KVM_REG_S390_PFTOKEN:
2544                 r = put_user(vcpu->arch.pfault_token,
2545                              (u64 __user *)reg->addr);
2546                 break;
2547         case KVM_REG_S390_PFCOMPARE:
2548                 r = put_user(vcpu->arch.pfault_compare,
2549                              (u64 __user *)reg->addr);
2550                 break;
2551         case KVM_REG_S390_PFSELECT:
2552                 r = put_user(vcpu->arch.pfault_select,
2553                              (u64 __user *)reg->addr);
2554                 break;
2555         case KVM_REG_S390_PP:
2556                 r = put_user(vcpu->arch.sie_block->pp,
2557                              (u64 __user *)reg->addr);
2558                 break;
2559         case KVM_REG_S390_GBEA:
2560                 r = put_user(vcpu->arch.sie_block->gbea,
2561                              (u64 __user *)reg->addr);
2562                 break;
2563         default:
2564                 break;
2565         }
2566
2567         return r;
2568 }
2569
2570 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2571                                            struct kvm_one_reg *reg)
2572 {
2573         int r = -EINVAL;
2574         __u64 val;
2575
2576         switch (reg->id) {
2577         case KVM_REG_S390_TODPR:
2578                 r = get_user(vcpu->arch.sie_block->todpr,
2579                              (u32 __user *)reg->addr);
2580                 break;
2581         case KVM_REG_S390_EPOCHDIFF:
2582                 r = get_user(vcpu->arch.sie_block->epoch,
2583                              (u64 __user *)reg->addr);
2584                 break;
2585         case KVM_REG_S390_CPU_TIMER:
2586                 r = get_user(val, (u64 __user *)reg->addr);
2587                 if (!r)
2588                         kvm_s390_set_cpu_timer(vcpu, val);
2589                 break;
2590         case KVM_REG_S390_CLOCK_COMP:
2591                 r = get_user(vcpu->arch.sie_block->ckc,
2592                              (u64 __user *)reg->addr);
2593                 break;
2594         case KVM_REG_S390_PFTOKEN:
2595                 r = get_user(vcpu->arch.pfault_token,
2596                              (u64 __user *)reg->addr);
2597                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2598                         kvm_clear_async_pf_completion_queue(vcpu);
2599                 break;
2600         case KVM_REG_S390_PFCOMPARE:
2601                 r = get_user(vcpu->arch.pfault_compare,
2602                              (u64 __user *)reg->addr);
2603                 break;
2604         case KVM_REG_S390_PFSELECT:
2605                 r = get_user(vcpu->arch.pfault_select,
2606                              (u64 __user *)reg->addr);
2607                 break;
2608         case KVM_REG_S390_PP:
2609                 r = get_user(vcpu->arch.sie_block->pp,
2610                              (u64 __user *)reg->addr);
2611                 break;
2612         case KVM_REG_S390_GBEA:
2613                 r = get_user(vcpu->arch.sie_block->gbea,
2614                              (u64 __user *)reg->addr);
2615                 break;
2616         default:
2617                 break;
2618         }
2619
2620         return r;
2621 }
2622
2623 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2624 {
2625         kvm_s390_vcpu_initial_reset(vcpu);
2626         return 0;
2627 }
2628
2629 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2630 {
2631         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2632         return 0;
2633 }
2634
2635 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2636 {
2637         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2638         return 0;
2639 }
2640
2641 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2642                                   struct kvm_sregs *sregs)
2643 {
2644         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2645         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2646         return 0;
2647 }
2648
2649 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2650                                   struct kvm_sregs *sregs)
2651 {
2652         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2653         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2654         return 0;
2655 }
2656
2657 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2658 {
2659         if (test_fp_ctl(fpu->fpc))
2660                 return -EINVAL;
2661         vcpu->run->s.regs.fpc = fpu->fpc;
2662         if (MACHINE_HAS_VX)
2663                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2664                                  (freg_t *) fpu->fprs);
2665         else
2666                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2667         return 0;
2668 }
2669
2670 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2671 {
2672         /* make sure we have the latest values */
2673         save_fpu_regs();
2674         if (MACHINE_HAS_VX)
2675                 convert_vx_to_fp((freg_t *) fpu->fprs,
2676                                  (__vector128 *) vcpu->run->s.regs.vrs);
2677         else
2678                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2679         fpu->fpc = vcpu->run->s.regs.fpc;
2680         return 0;
2681 }
2682
2683 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2684 {
2685         int rc = 0;
2686
2687         if (!is_vcpu_stopped(vcpu))
2688                 rc = -EBUSY;
2689         else {
2690                 vcpu->run->psw_mask = psw.mask;
2691                 vcpu->run->psw_addr = psw.addr;
2692         }
2693         return rc;
2694 }
2695
2696 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2697                                   struct kvm_translation *tr)
2698 {
2699         return -EINVAL; /* not implemented yet */
2700 }
2701
2702 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2703                               KVM_GUESTDBG_USE_HW_BP | \
2704                               KVM_GUESTDBG_ENABLE)
2705
2706 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2707                                         struct kvm_guest_debug *dbg)
2708 {
2709         int rc = 0;
2710
2711         vcpu->guest_debug = 0;
2712         kvm_s390_clear_bp_data(vcpu);
2713
2714         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2715                 return -EINVAL;
2716         if (!sclp.has_gpere)
2717                 return -EINVAL;
2718
2719         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2720                 vcpu->guest_debug = dbg->control;
2721                 /* enforce guest PER */
2722                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2723
2724                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2725                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2726         } else {
2727                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2728                 vcpu->arch.guestdbg.last_bp = 0;
2729         }
2730
2731         if (rc) {
2732                 vcpu->guest_debug = 0;
2733                 kvm_s390_clear_bp_data(vcpu);
2734                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2735         }
2736
2737         return rc;
2738 }
2739
2740 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2741                                     struct kvm_mp_state *mp_state)
2742 {
2743         /* CHECK_STOP and LOAD are not supported yet */
2744         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2745                                        KVM_MP_STATE_OPERATING;
2746 }
2747
2748 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2749                                     struct kvm_mp_state *mp_state)
2750 {
2751         int rc = 0;
2752
2753         /* user space knows about this interface - let it control the state */
2754         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2755
2756         switch (mp_state->mp_state) {
2757         case KVM_MP_STATE_STOPPED:
2758                 kvm_s390_vcpu_stop(vcpu);
2759                 break;
2760         case KVM_MP_STATE_OPERATING:
2761                 kvm_s390_vcpu_start(vcpu);
2762                 break;
2763         case KVM_MP_STATE_LOAD:
2764         case KVM_MP_STATE_CHECK_STOP:
2765                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2766         default:
2767                 rc = -ENXIO;
2768         }
2769
2770         return rc;
2771 }
2772
2773 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2774 {
2775         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2776 }
2777
2778 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2779 {
2780 retry:
2781         kvm_s390_vcpu_request_handled(vcpu);
2782         if (!kvm_request_pending(vcpu))
2783                 return 0;
2784         /*
2785          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2786          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2787          * This ensures that the ipte instruction for this request has
2788          * already finished. We might race against a second unmapper that
2789          * wants to set the blocking bit. Lets just retry the request loop.
2790          */
2791         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2792                 int rc;
2793                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2794                                           kvm_s390_get_prefix(vcpu),
2795                                           PAGE_SIZE * 2, PROT_WRITE);
2796                 if (rc) {
2797                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2798                         return rc;
2799                 }
2800                 goto retry;
2801         }
2802
2803         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2804                 vcpu->arch.sie_block->ihcpu = 0xffff;
2805                 goto retry;
2806         }
2807
2808         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2809                 if (!ibs_enabled(vcpu)) {
2810                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2811                         atomic_or(CPUSTAT_IBS,
2812                                         &vcpu->arch.sie_block->cpuflags);
2813                 }
2814                 goto retry;
2815         }
2816
2817         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2818                 if (ibs_enabled(vcpu)) {
2819                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2820                         atomic_andnot(CPUSTAT_IBS,
2821                                           &vcpu->arch.sie_block->cpuflags);
2822                 }
2823                 goto retry;
2824         }
2825
2826         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2827                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2828                 goto retry;
2829         }
2830
2831         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2832                 /*
2833                  * Disable CMMA virtualization; we will emulate the ESSA
2834                  * instruction manually, in order to provide additional
2835                  * functionalities needed for live migration.
2836                  */
2837                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2838                 goto retry;
2839         }
2840
2841         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2842                 /*
2843                  * Re-enable CMMA virtualization if CMMA is available and
2844                  * was used.
2845                  */
2846                 if ((vcpu->kvm->arch.use_cmma) &&
2847                     (vcpu->kvm->mm->context.use_cmma))
2848                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2849                 goto retry;
2850         }
2851
2852         /* nothing to do, just clear the request */
2853         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2854
2855         return 0;
2856 }
2857
2858 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2859 {
2860         struct kvm_vcpu *vcpu;
2861         int i;
2862
2863         mutex_lock(&kvm->lock);
2864         preempt_disable();
2865         kvm->arch.epoch = tod - get_tod_clock();
2866         kvm_s390_vcpu_block_all(kvm);
2867         kvm_for_each_vcpu(i, vcpu, kvm)
2868                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2869         kvm_s390_vcpu_unblock_all(kvm);
2870         preempt_enable();
2871         mutex_unlock(&kvm->lock);
2872 }
2873
2874 /**
2875  * kvm_arch_fault_in_page - fault-in guest page if necessary
2876  * @vcpu: The corresponding virtual cpu
2877  * @gpa: Guest physical address
2878  * @writable: Whether the page should be writable or not
2879  *
2880  * Make sure that a guest page has been faulted-in on the host.
2881  *
2882  * Return: Zero on success, negative error code otherwise.
2883  */
2884 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2885 {
2886         return gmap_fault(vcpu->arch.gmap, gpa,
2887                           writable ? FAULT_FLAG_WRITE : 0);
2888 }
2889
2890 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2891                                       unsigned long token)
2892 {
2893         struct kvm_s390_interrupt inti;
2894         struct kvm_s390_irq irq;
2895
2896         if (start_token) {
2897                 irq.u.ext.ext_params2 = token;
2898                 irq.type = KVM_S390_INT_PFAULT_INIT;
2899                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2900         } else {
2901                 inti.type = KVM_S390_INT_PFAULT_DONE;
2902                 inti.parm64 = token;
2903                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2904         }
2905 }
2906
2907 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2908                                      struct kvm_async_pf *work)
2909 {
2910         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2911         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2912 }
2913
2914 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2915                                  struct kvm_async_pf *work)
2916 {
2917         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2918         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2919 }
2920
2921 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2922                                struct kvm_async_pf *work)
2923 {
2924         /* s390 will always inject the page directly */
2925 }
2926
2927 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2928 {
2929         /*
2930          * s390 will always inject the page directly,
2931          * but we still want check_async_completion to cleanup
2932          */
2933         return true;
2934 }
2935
2936 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2937 {
2938         hva_t hva;
2939         struct kvm_arch_async_pf arch;
2940         int rc;
2941
2942         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2943                 return 0;
2944         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2945             vcpu->arch.pfault_compare)
2946                 return 0;
2947         if (psw_extint_disabled(vcpu))
2948                 return 0;
2949         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2950                 return 0;
2951         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2952                 return 0;
2953         if (!vcpu->arch.gmap->pfault_enabled)
2954                 return 0;
2955
2956         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2957         hva += current->thread.gmap_addr & ~PAGE_MASK;
2958         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2959                 return 0;
2960
2961         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2962         return rc;
2963 }
2964
2965 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2966 {
2967         int rc, cpuflags;
2968
2969         /*
2970          * On s390 notifications for arriving pages will be delivered directly
2971          * to the guest but the house keeping for completed pfaults is
2972          * handled outside the worker.
2973          */
2974         kvm_check_async_pf_completion(vcpu);
2975
2976         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2977         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2978
2979         if (need_resched())
2980                 schedule();
2981
2982         if (test_cpu_flag(CIF_MCCK_PENDING))
2983                 s390_handle_mcck();
2984
2985         if (!kvm_is_ucontrol(vcpu->kvm)) {
2986                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2987                 if (rc)
2988                         return rc;
2989         }
2990
2991         rc = kvm_s390_handle_requests(vcpu);
2992         if (rc)
2993                 return rc;
2994
2995         if (guestdbg_enabled(vcpu)) {
2996                 kvm_s390_backup_guest_per_regs(vcpu);
2997                 kvm_s390_patch_guest_per_regs(vcpu);
2998         }
2999
3000         vcpu->arch.sie_block->icptcode = 0;
3001         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3002         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3003         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3004
3005         return 0;
3006 }
3007
3008 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3009 {
3010         struct kvm_s390_pgm_info pgm_info = {
3011                 .code = PGM_ADDRESSING,
3012         };
3013         u8 opcode, ilen;
3014         int rc;
3015
3016         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3017         trace_kvm_s390_sie_fault(vcpu);
3018
3019         /*
3020          * We want to inject an addressing exception, which is defined as a
3021          * suppressing or terminating exception. However, since we came here
3022          * by a DAT access exception, the PSW still points to the faulting
3023          * instruction since DAT exceptions are nullifying. So we've got
3024          * to look up the current opcode to get the length of the instruction
3025          * to be able to forward the PSW.
3026          */
3027         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3028         ilen = insn_length(opcode);
3029         if (rc < 0) {
3030                 return rc;
3031         } else if (rc) {
3032                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3033                  * Forward by arbitrary ilc, injection will take care of
3034                  * nullification if necessary.
3035                  */
3036                 pgm_info = vcpu->arch.pgm;
3037                 ilen = 4;
3038         }
3039         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3040         kvm_s390_forward_psw(vcpu, ilen);
3041         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3042 }
3043
3044 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3045 {
3046         struct mcck_volatile_info *mcck_info;
3047         struct sie_page *sie_page;
3048
3049         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3050                    vcpu->arch.sie_block->icptcode);
3051         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3052
3053         if (guestdbg_enabled(vcpu))
3054                 kvm_s390_restore_guest_per_regs(vcpu);
3055
3056         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3057         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3058
3059         if (exit_reason == -EINTR) {
3060                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3061                 sie_page = container_of(vcpu->arch.sie_block,
3062                                         struct sie_page, sie_block);
3063                 mcck_info = &sie_page->mcck_info;
3064                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3065                 return 0;
3066         }
3067
3068         if (vcpu->arch.sie_block->icptcode > 0) {
3069                 int rc = kvm_handle_sie_intercept(vcpu);
3070
3071                 if (rc != -EOPNOTSUPP)
3072                         return rc;
3073                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3074                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3075                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3076                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3077                 return -EREMOTE;
3078         } else if (exit_reason != -EFAULT) {
3079                 vcpu->stat.exit_null++;
3080                 return 0;
3081         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3082                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3083                 vcpu->run->s390_ucontrol.trans_exc_code =
3084                                                 current->thread.gmap_addr;
3085                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3086                 return -EREMOTE;
3087         } else if (current->thread.gmap_pfault) {
3088                 trace_kvm_s390_major_guest_pfault(vcpu);
3089                 current->thread.gmap_pfault = 0;
3090                 if (kvm_arch_setup_async_pf(vcpu))
3091                         return 0;
3092                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3093         }
3094         return vcpu_post_run_fault_in_sie(vcpu);
3095 }
3096
3097 static int __vcpu_run(struct kvm_vcpu *vcpu)
3098 {
3099         int rc, exit_reason;
3100
3101         /*
3102          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3103          * ning the guest), so that memslots (and other stuff) are protected
3104          */
3105         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3106
3107         do {
3108                 rc = vcpu_pre_run(vcpu);
3109                 if (rc)
3110                         break;
3111
3112                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3113                 /*
3114                  * As PF_VCPU will be used in fault handler, between
3115                  * guest_enter and guest_exit should be no uaccess.
3116                  */
3117                 local_irq_disable();
3118                 guest_enter_irqoff();
3119                 __disable_cpu_timer_accounting(vcpu);
3120                 local_irq_enable();
3121                 exit_reason = sie64a(vcpu->arch.sie_block,
3122                                      vcpu->run->s.regs.gprs);
3123                 local_irq_disable();
3124                 __enable_cpu_timer_accounting(vcpu);
3125                 guest_exit_irqoff();
3126                 local_irq_enable();
3127                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3128
3129                 rc = vcpu_post_run(vcpu, exit_reason);
3130         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3131
3132         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3133         return rc;
3134 }
3135
3136 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3137 {
3138         struct runtime_instr_cb *riccb;
3139         struct gs_cb *gscb;
3140
3141         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3142         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3143         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3144         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3145         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3146                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3147         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3148                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3149                 /* some control register changes require a tlb flush */
3150                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3151         }
3152         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3153                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3154                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3155                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3156                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3157                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3158         }
3159         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3160                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3161                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3162                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3163                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3164                         kvm_clear_async_pf_completion_queue(vcpu);
3165         }
3166         /*
3167          * If userspace sets the riccb (e.g. after migration) to a valid state,
3168          * we should enable RI here instead of doing the lazy enablement.
3169          */
3170         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3171             test_kvm_facility(vcpu->kvm, 64) &&
3172             riccb->valid &&
3173             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3174                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3175                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3176         }
3177         /*
3178          * If userspace sets the gscb (e.g. after migration) to non-zero,
3179          * we should enable GS here instead of doing the lazy enablement.
3180          */
3181         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3182             test_kvm_facility(vcpu->kvm, 133) &&
3183             gscb->gssm &&
3184             !vcpu->arch.gs_enabled) {
3185                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3186                 vcpu->arch.sie_block->ecb |= ECB_GS;
3187                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3188                 vcpu->arch.gs_enabled = 1;
3189         }
3190         save_access_regs(vcpu->arch.host_acrs);
3191         restore_access_regs(vcpu->run->s.regs.acrs);
3192         /* save host (userspace) fprs/vrs */
3193         save_fpu_regs();
3194         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3195         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3196         if (MACHINE_HAS_VX)
3197                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3198         else
3199                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3200         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3201         if (test_fp_ctl(current->thread.fpu.fpc))
3202                 /* User space provided an invalid FPC, let's clear it */
3203                 current->thread.fpu.fpc = 0;
3204         if (MACHINE_HAS_GS) {
3205                 preempt_disable();
3206                 __ctl_set_bit(2, 4);
3207                 if (current->thread.gs_cb) {
3208                         vcpu->arch.host_gscb = current->thread.gs_cb;
3209                         save_gs_cb(vcpu->arch.host_gscb);
3210                 }
3211                 if (vcpu->arch.gs_enabled) {
3212                         current->thread.gs_cb = (struct gs_cb *)
3213                                                 &vcpu->run->s.regs.gscb;
3214                         restore_gs_cb(current->thread.gs_cb);
3215                 }
3216                 preempt_enable();
3217         }
3218
3219         kvm_run->kvm_dirty_regs = 0;
3220 }
3221
3222 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3223 {
3224         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3225         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3226         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3227         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3228         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3229         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3230         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3231         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3232         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3233         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3234         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3235         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3236         save_access_regs(vcpu->run->s.regs.acrs);
3237         restore_access_regs(vcpu->arch.host_acrs);
3238         /* Save guest register state */
3239         save_fpu_regs();
3240         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3241         /* Restore will be done lazily at return */
3242         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3243         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3244         if (MACHINE_HAS_GS) {
3245                 __ctl_set_bit(2, 4);
3246                 if (vcpu->arch.gs_enabled)
3247                         save_gs_cb(current->thread.gs_cb);
3248                 preempt_disable();
3249                 current->thread.gs_cb = vcpu->arch.host_gscb;
3250                 restore_gs_cb(vcpu->arch.host_gscb);
3251                 preempt_enable();
3252                 if (!vcpu->arch.host_gscb)
3253                         __ctl_clear_bit(2, 4);
3254                 vcpu->arch.host_gscb = NULL;
3255         }
3256
3257 }
3258
3259 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3260 {
3261         int rc;
3262         sigset_t sigsaved;
3263
3264         if (kvm_run->immediate_exit)
3265                 return -EINTR;
3266
3267         if (guestdbg_exit_pending(vcpu)) {
3268                 kvm_s390_prepare_debug_exit(vcpu);
3269                 return 0;
3270         }
3271
3272         if (vcpu->sigset_active)
3273                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3274
3275         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3276                 kvm_s390_vcpu_start(vcpu);
3277         } else if (is_vcpu_stopped(vcpu)) {
3278                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3279                                    vcpu->vcpu_id);
3280                 return -EINVAL;
3281         }
3282
3283         sync_regs(vcpu, kvm_run);
3284         enable_cpu_timer_accounting(vcpu);
3285
3286         might_fault();
3287         rc = __vcpu_run(vcpu);
3288
3289         if (signal_pending(current) && !rc) {
3290                 kvm_run->exit_reason = KVM_EXIT_INTR;
3291                 rc = -EINTR;
3292         }
3293
3294         if (guestdbg_exit_pending(vcpu) && !rc)  {
3295                 kvm_s390_prepare_debug_exit(vcpu);
3296                 rc = 0;
3297         }
3298
3299         if (rc == -EREMOTE) {
3300                 /* userspace support is needed, kvm_run has been prepared */
3301                 rc = 0;
3302         }
3303
3304         disable_cpu_timer_accounting(vcpu);
3305         store_regs(vcpu, kvm_run);
3306
3307         if (vcpu->sigset_active)
3308                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3309
3310         vcpu->stat.exit_userspace++;
3311         return rc;
3312 }
3313
3314 /*
3315  * store status at address
3316  * we use have two special cases:
3317  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3318  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3319  */
3320 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3321 {
3322         unsigned char archmode = 1;
3323         freg_t fprs[NUM_FPRS];
3324         unsigned int px;
3325         u64 clkcomp, cputm;
3326         int rc;
3327
3328         px = kvm_s390_get_prefix(vcpu);
3329         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3330                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3331                         return -EFAULT;
3332                 gpa = 0;
3333         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3334                 if (write_guest_real(vcpu, 163, &archmode, 1))
3335                         return -EFAULT;
3336                 gpa = px;
3337         } else
3338                 gpa -= __LC_FPREGS_SAVE_AREA;
3339
3340         /* manually convert vector registers if necessary */
3341         if (MACHINE_HAS_VX) {
3342                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3343                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3344                                      fprs, 128);
3345         } else {
3346                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3347                                      vcpu->run->s.regs.fprs, 128);
3348         }
3349         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3350                               vcpu->run->s.regs.gprs, 128);
3351         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3352                               &vcpu->arch.sie_block->gpsw, 16);
3353         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3354                               &px, 4);
3355         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3356                               &vcpu->run->s.regs.fpc, 4);
3357         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3358                               &vcpu->arch.sie_block->todpr, 4);
3359         cputm = kvm_s390_get_cpu_timer(vcpu);
3360         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3361                               &cputm, 8);
3362         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3363         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3364                               &clkcomp, 8);
3365         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3366                               &vcpu->run->s.regs.acrs, 64);
3367         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3368                               &vcpu->arch.sie_block->gcr, 128);
3369         return rc ? -EFAULT : 0;
3370 }
3371
3372 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3373 {
3374         /*
3375          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3376          * switch in the run ioctl. Let's update our copies before we save
3377          * it into the save area
3378          */
3379         save_fpu_regs();
3380         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3381         save_access_regs(vcpu->run->s.regs.acrs);
3382
3383         return kvm_s390_store_status_unloaded(vcpu, addr);
3384 }
3385
3386 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3387 {
3388         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3389         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3390 }
3391
3392 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3393 {
3394         unsigned int i;
3395         struct kvm_vcpu *vcpu;
3396
3397         kvm_for_each_vcpu(i, vcpu, kvm) {
3398                 __disable_ibs_on_vcpu(vcpu);
3399         }
3400 }
3401
3402 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3403 {
3404         if (!sclp.has_ibs)
3405                 return;
3406         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3407         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3408 }
3409
3410 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3411 {
3412         int i, online_vcpus, started_vcpus = 0;
3413
3414         if (!is_vcpu_stopped(vcpu))
3415                 return;
3416
3417         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3418         /* Only one cpu at a time may enter/leave the STOPPED state. */
3419         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3420         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3421
3422         for (i = 0; i < online_vcpus; i++) {
3423                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3424                         started_vcpus++;
3425         }
3426
3427         if (started_vcpus == 0) {
3428                 /* we're the only active VCPU -> speed it up */
3429                 __enable_ibs_on_vcpu(vcpu);
3430         } else if (started_vcpus == 1) {
3431                 /*
3432                  * As we are starting a second VCPU, we have to disable
3433                  * the IBS facility on all VCPUs to remove potentially
3434                  * oustanding ENABLE requests.
3435                  */
3436                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3437         }
3438
3439         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3440         /*
3441          * Another VCPU might have used IBS while we were offline.
3442          * Let's play safe and flush the VCPU at startup.
3443          */
3444         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3445         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3446         return;
3447 }
3448
3449 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3450 {
3451         int i, online_vcpus, started_vcpus = 0;
3452         struct kvm_vcpu *started_vcpu = NULL;
3453
3454         if (is_vcpu_stopped(vcpu))
3455                 return;
3456
3457         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3458         /* Only one cpu at a time may enter/leave the STOPPED state. */
3459         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3460         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3461
3462         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3463         kvm_s390_clear_stop_irq(vcpu);
3464
3465         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3466         __disable_ibs_on_vcpu(vcpu);
3467
3468         for (i = 0; i < online_vcpus; i++) {
3469                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3470                         started_vcpus++;
3471                         started_vcpu = vcpu->kvm->vcpus[i];
3472                 }
3473         }
3474
3475         if (started_vcpus == 1) {
3476                 /*
3477                  * As we only have one VCPU left, we want to enable the
3478                  * IBS facility for that VCPU to speed it up.
3479                  */
3480                 __enable_ibs_on_vcpu(started_vcpu);
3481         }
3482
3483         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3484         return;
3485 }
3486
3487 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3488                                      struct kvm_enable_cap *cap)
3489 {
3490         int r;
3491
3492         if (cap->flags)
3493                 return -EINVAL;
3494
3495         switch (cap->cap) {
3496         case KVM_CAP_S390_CSS_SUPPORT:
3497                 if (!vcpu->kvm->arch.css_support) {
3498                         vcpu->kvm->arch.css_support = 1;
3499                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3500                         trace_kvm_s390_enable_css(vcpu->kvm);
3501                 }
3502                 r = 0;
3503                 break;
3504         default:
3505                 r = -EINVAL;
3506                 break;
3507         }
3508         return r;
3509 }
3510
3511 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3512                                   struct kvm_s390_mem_op *mop)
3513 {
3514         void __user *uaddr = (void __user *)mop->buf;
3515         void *tmpbuf = NULL;
3516         int r, srcu_idx;
3517         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3518                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3519
3520         if (mop->flags & ~supported_flags)
3521                 return -EINVAL;
3522
3523         if (mop->size > MEM_OP_MAX_SIZE)
3524                 return -E2BIG;
3525
3526         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3527                 tmpbuf = vmalloc(mop->size);
3528                 if (!tmpbuf)
3529                         return -ENOMEM;
3530         }
3531
3532         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3533
3534         switch (mop->op) {
3535         case KVM_S390_MEMOP_LOGICAL_READ:
3536                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3537                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3538                                             mop->size, GACC_FETCH);
3539                         break;
3540                 }
3541                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3542                 if (r == 0) {
3543                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3544                                 r = -EFAULT;
3545                 }
3546                 break;
3547         case KVM_S390_MEMOP_LOGICAL_WRITE:
3548                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3549                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3550                                             mop->size, GACC_STORE);
3551                         break;
3552                 }
3553                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3554                         r = -EFAULT;
3555                         break;
3556                 }
3557                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3558                 break;
3559         default:
3560                 r = -EINVAL;
3561         }
3562
3563         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3564
3565         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3566                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3567
3568         vfree(tmpbuf);
3569         return r;
3570 }
3571
3572 long kvm_arch_vcpu_ioctl(struct file *filp,
3573                          unsigned int ioctl, unsigned long arg)
3574 {
3575         struct kvm_vcpu *vcpu = filp->private_data;
3576         void __user *argp = (void __user *)arg;
3577         int idx;
3578         long r;
3579
3580         switch (ioctl) {
3581         case KVM_S390_IRQ: {
3582                 struct kvm_s390_irq s390irq;
3583
3584                 r = -EFAULT;
3585                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3586                         break;
3587                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3588                 break;
3589         }
3590         case KVM_S390_INTERRUPT: {
3591                 struct kvm_s390_interrupt s390int;
3592                 struct kvm_s390_irq s390irq;
3593
3594                 r = -EFAULT;
3595                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3596                         break;
3597                 if (s390int_to_s390irq(&s390int, &s390irq))
3598                         return -EINVAL;
3599                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3600                 break;
3601         }
3602         case KVM_S390_STORE_STATUS:
3603                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3604                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3605                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3606                 break;
3607         case KVM_S390_SET_INITIAL_PSW: {
3608                 psw_t psw;
3609
3610                 r = -EFAULT;
3611                 if (copy_from_user(&psw, argp, sizeof(psw)))
3612                         break;
3613                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3614                 break;
3615         }
3616         case KVM_S390_INITIAL_RESET:
3617                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3618                 break;
3619         case KVM_SET_ONE_REG:
3620         case KVM_GET_ONE_REG: {
3621                 struct kvm_one_reg reg;
3622                 r = -EFAULT;
3623                 if (copy_from_user(&reg, argp, sizeof(reg)))
3624                         break;
3625                 if (ioctl == KVM_SET_ONE_REG)
3626                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3627                 else
3628                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3629                 break;
3630         }
3631 #ifdef CONFIG_KVM_S390_UCONTROL
3632         case KVM_S390_UCAS_MAP: {
3633                 struct kvm_s390_ucas_mapping ucasmap;
3634
3635                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3636                         r = -EFAULT;
3637                         break;
3638                 }
3639
3640                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3641                         r = -EINVAL;
3642                         break;
3643                 }
3644
3645                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3646                                      ucasmap.vcpu_addr, ucasmap.length);
3647                 break;
3648         }
3649         case KVM_S390_UCAS_UNMAP: {
3650                 struct kvm_s390_ucas_mapping ucasmap;
3651
3652                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3653                         r = -EFAULT;
3654                         break;
3655                 }
3656
3657                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3658                         r = -EINVAL;
3659                         break;
3660                 }
3661
3662                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3663                         ucasmap.length);
3664                 break;
3665         }
3666 #endif
3667         case KVM_S390_VCPU_FAULT: {
3668                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3669                 break;
3670         }
3671         case KVM_ENABLE_CAP:
3672         {
3673                 struct kvm_enable_cap cap;
3674                 r = -EFAULT;
3675                 if (copy_from_user(&cap, argp, sizeof(cap)))
3676                         break;
3677                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3678                 break;
3679         }
3680         case KVM_S390_MEM_OP: {
3681                 struct kvm_s390_mem_op mem_op;
3682
3683                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3684                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3685                 else
3686                         r = -EFAULT;
3687                 break;
3688         }
3689         case KVM_S390_SET_IRQ_STATE: {
3690                 struct kvm_s390_irq_state irq_state;
3691
3692                 r = -EFAULT;
3693                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3694                         break;
3695                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3696                     irq_state.len == 0 ||
3697                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3698                         r = -EINVAL;
3699                         break;
3700                 }
3701                 r = kvm_s390_set_irq_state(vcpu,
3702                                            (void __user *) irq_state.buf,
3703                                            irq_state.len);
3704                 break;
3705         }
3706         case KVM_S390_GET_IRQ_STATE: {
3707                 struct kvm_s390_irq_state irq_state;
3708
3709                 r = -EFAULT;
3710                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3711                         break;
3712                 if (irq_state.len == 0) {
3713                         r = -EINVAL;
3714                         break;
3715                 }
3716                 r = kvm_s390_get_irq_state(vcpu,
3717                                            (__u8 __user *)  irq_state.buf,
3718                                            irq_state.len);
3719                 break;
3720         }
3721         default:
3722                 r = -ENOTTY;
3723         }
3724         return r;
3725 }
3726
3727 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3728 {
3729 #ifdef CONFIG_KVM_S390_UCONTROL
3730         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3731                  && (kvm_is_ucontrol(vcpu->kvm))) {
3732                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3733                 get_page(vmf->page);
3734                 return 0;
3735         }
3736 #endif
3737         return VM_FAULT_SIGBUS;
3738 }
3739
3740 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3741                             unsigned long npages)
3742 {
3743         return 0;
3744 }
3745
3746 /* Section: memory related */
3747 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3748                                    struct kvm_memory_slot *memslot,
3749                                    const struct kvm_userspace_memory_region *mem,
3750                                    enum kvm_mr_change change)
3751 {
3752         /* A few sanity checks. We can have memory slots which have to be
3753            located/ended at a segment boundary (1MB). The memory in userland is
3754            ok to be fragmented into various different vmas. It is okay to mmap()
3755            and munmap() stuff in this slot after doing this call at any time */
3756
3757         if (mem->userspace_addr & 0xffffful)
3758                 return -EINVAL;
3759
3760         if (mem->memory_size & 0xffffful)
3761                 return -EINVAL;
3762
3763         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3764                 return -EINVAL;
3765
3766         return 0;
3767 }
3768
3769 void kvm_arch_commit_memory_region(struct kvm *kvm,
3770                                 const struct kvm_userspace_memory_region *mem,
3771                                 const struct kvm_memory_slot *old,
3772                                 const struct kvm_memory_slot *new,
3773                                 enum kvm_mr_change change)
3774 {
3775         int rc;
3776
3777         /* If the basics of the memslot do not change, we do not want
3778          * to update the gmap. Every update causes several unnecessary
3779          * segment translation exceptions. This is usually handled just
3780          * fine by the normal fault handler + gmap, but it will also
3781          * cause faults on the prefix page of running guest CPUs.
3782          */
3783         if (old->userspace_addr == mem->userspace_addr &&
3784             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3785             old->npages * PAGE_SIZE == mem->memory_size)
3786                 return;
3787
3788         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3789                 mem->guest_phys_addr, mem->memory_size);
3790         if (rc)
3791                 pr_warn("failed to commit memory region\n");
3792         return;
3793 }
3794
3795 static inline unsigned long nonhyp_mask(int i)
3796 {
3797         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3798
3799         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3800 }
3801
3802 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3803 {
3804         vcpu->valid_wakeup = false;
3805 }
3806
3807 static int __init kvm_s390_init(void)
3808 {
3809         int i;
3810
3811         if (!sclp.has_sief2) {
3812                 pr_info("SIE not available\n");
3813                 return -ENODEV;
3814         }
3815
3816         for (i = 0; i < 16; i++)
3817                 kvm_s390_fac_list_mask[i] |=
3818                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3819
3820         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3821 }
3822
3823 static void __exit kvm_s390_exit(void)
3824 {
3825         kvm_exit();
3826 }
3827
3828 module_init(kvm_s390_init);
3829 module_exit(kvm_s390_exit);
3830
3831 /*
3832  * Enable autoloading of the kvm module.
3833  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3834  * since x86 takes a different approach.
3835  */
3836 #include <linux/miscdevice.h>
3837 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3838 MODULE_ALIAS("devname:kvm");