arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/stp.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/timex.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_pei", VCPU_STAT(exit_pei) },
  69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  91         { "instruction_spx", VCPU_STAT(instruction_spx) },
  92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  93         { "instruction_stap", VCPU_STAT(instruction_stap) },
  94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  98         { "instruction_essa", VCPU_STAT(instruction_essa) },
  99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 103         { "instruction_sie", VCPU_STAT(instruction_sie) },
 104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 120         { "diagnose_10", VCPU_STAT(diagnose_10) },
 121         { "diagnose_44", VCPU_STAT(diagnose_44) },
 122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 123         { "diagnose_258", VCPU_STAT(diagnose_258) },
 124         { "diagnose_308", VCPU_STAT(diagnose_308) },
 125         { "diagnose_500", VCPU_STAT(diagnose_500) },
 126         { NULL }
 127 };
 128
 129 /* allow nested virtualization in KVM (if enabled by user space) */
 130 static int nested;
 131 module_param(nested, int, S_IRUGO);
 132 MODULE_PARM_DESC(nested, "Nested virtualization support");
 133
 134 /* upper facilities limit for kvm */
 135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 136
 137 unsigned long kvm_s390_fac_list_mask_size(void)
 138 {
 139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 141 }
 142
 143 /* available cpu features supported by kvm */
 144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 145 /* available subfunctions indicated via query / "test bit" */
 146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 147
 148 static struct gmap_notifier gmap_notifier;
 149 static struct gmap_notifier vsie_gmap_notifier;
 150 debug_info_t *kvm_s390_dbf;
 151
 152 /* Section: not file related */
 153 int kvm_arch_hardware_enable(void)
 154 {
 155         /* every s390 is virtualization enabled ;-) */
 156         return 0;
 157 }
 158
 159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 160                               unsigned long end);
 161
 162 /*
 163  * This callback is executed during stop_machine(). All CPUs are therefore
 164  * temporarily stopped. In order not to change guest behavior, we have to
 165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 166  * so a CPU won't be stopped while calculating with the epoch.
 167  */
 168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 169                           void *v)
 170 {
 171         struct kvm *kvm;
 172         struct kvm_vcpu *vcpu;
 173         int i;
 174         unsigned long long *delta = v;
 175
 176         list_for_each_entry(kvm, &vm_list, vm_list) {
 177                 kvm->arch.epoch -= *delta;
 178                 kvm_for_each_vcpu(i, vcpu, kvm) {
 179                         vcpu->arch.sie_block->epoch -= *delta;
 180                         if (vcpu->arch.cputm_enabled)
 181                                 vcpu->arch.cputm_start += *delta;
 182                         if (vcpu->arch.vsie_block)
 183                                 vcpu->arch.vsie_block->epoch -= *delta;
 184                 }
 185         }
 186         return NOTIFY_OK;
 187 }
 188
 189 static struct notifier_block kvm_clock_notifier = {
 190         .notifier_call = kvm_clock_sync,
 191 };
 192
 193 int kvm_arch_hardware_setup(void)
 194 {
 195         gmap_notifier.notifier_call = kvm_gmap_notifier;
 196         gmap_register_pte_notifier(&gmap_notifier);
 197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 198         gmap_register_pte_notifier(&vsie_gmap_notifier);
 199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 200                                        &kvm_clock_notifier);
 201         return 0;
 202 }
 203
 204 void kvm_arch_hardware_unsetup(void)
 205 {
 206         gmap_unregister_pte_notifier(&gmap_notifier);
 207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 209                                          &kvm_clock_notifier);
 210 }
 211
 212 static void allow_cpu_feat(unsigned long nr)
 213 {
 214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 215 }
 216
 217 static inline int plo_test_bit(unsigned char nr)
 218 {
 219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 220         int cc = 3; /* subfunction not available */
 221
 222         asm volatile(
 223                 /* Parameter registers are ignored for "test bit" */
 224                 "       plo     0,0,0,0(0)\n"
 225                 "       ipm     %0\n"
 226                 "       srl     %0,28\n"
 227                 : "=d" (cc)
 228                 : "d" (r0)
 229                 : "cc");
 230         return cc == 0;
 231 }
 232
 233 static void kvm_s390_cpu_feat_init(void)
 234 {
 235         int i;
 236
 237         for (i = 0; i < 256; ++i) {
 238                 if (plo_test_bit(i))
 239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 240         }
 241
 242         if (test_facility(28)) /* TOD-clock steering */
 243                 ptff(kvm_s390_available_subfunc.ptff,
 244                      sizeof(kvm_s390_available_subfunc.ptff),
 245                      PTFF_QAF);
 246
 247         if (test_facility(17)) { /* MSA */
 248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 249                               kvm_s390_available_subfunc.kmac);
 250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 251                               kvm_s390_available_subfunc.kmc);
 252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 253                               kvm_s390_available_subfunc.km);
 254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 255                               kvm_s390_available_subfunc.kimd);
 256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 257                               kvm_s390_available_subfunc.klmd);
 258         }
 259         if (test_facility(76)) /* MSA3 */
 260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 261                               kvm_s390_available_subfunc.pckmo);
 262         if (test_facility(77)) { /* MSA4 */
 263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 264                               kvm_s390_available_subfunc.kmctr);
 265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 266                               kvm_s390_available_subfunc.kmf);
 267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 268                               kvm_s390_available_subfunc.kmo);
 269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 270                               kvm_s390_available_subfunc.pcc);
 271         }
 272         if (test_facility(57)) /* MSA5 */
 273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
 274                               kvm_s390_available_subfunc.ppno);
 275
 276         if (MACHINE_HAS_ESOP)
 277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 278         /*
 279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 281          */
 282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 283             !test_facility(3) || !nested)
 284                 return;
 285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 286         if (sclp.has_64bscao)
 287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 288         if (sclp.has_siif)
 289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 290         if (sclp.has_gpere)
 291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 292         if (sclp.has_gsls)
 293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 294         if (sclp.has_ib)
 295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 296         if (sclp.has_cei)
 297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 298         if (sclp.has_ibs)
 299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 300         /*
 301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 302          * all skey handling functions read/set the skey from the PGSTE
 303          * instead of the real storage key.
 304          *
 305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 306          * pages being detected as preserved although they are resident.
 307          *
 308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 310          *
 311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 314          *
 315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 316          * cannot easily shadow the SCA because of the ipte lock.
 317          */
 318 }
 319
 320 int kvm_arch_init(void *opaque)
 321 {
 322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 323         if (!kvm_s390_dbf)
 324                 return -ENOMEM;
 325
 326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 327                 debug_unregister(kvm_s390_dbf);
 328                 return -ENOMEM;
 329         }
 330
 331         kvm_s390_cpu_feat_init();
 332
 333         /* Register floating interrupt controller interface. */
 334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 335 }
 336
 337 void kvm_arch_exit(void)
 338 {
 339         debug_unregister(kvm_s390_dbf);
 340 }
 341
 342 /* Section: device related */
 343 long kvm_arch_dev_ioctl(struct file *filp,
 344                         unsigned int ioctl, unsigned long arg)
 345 {
 346         if (ioctl == KVM_S390_ENABLE_SIE)
 347                 return s390_enable_sie();
 348         return -EINVAL;
 349 }
 350
 351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 352 {
 353         int r;
 354
 355         switch (ext) {
 356         case KVM_CAP_S390_PSW:
 357         case KVM_CAP_S390_GMAP:
 358         case KVM_CAP_SYNC_MMU:
 359 #ifdef CONFIG_KVM_S390_UCONTROL
 360         case KVM_CAP_S390_UCONTROL:
 361 #endif
 362         case KVM_CAP_ASYNC_PF:
 363         case KVM_CAP_SYNC_REGS:
 364         case KVM_CAP_ONE_REG:
 365         case KVM_CAP_ENABLE_CAP:
 366         case KVM_CAP_S390_CSS_SUPPORT:
 367         case KVM_CAP_IOEVENTFD:
 368         case KVM_CAP_DEVICE_CTRL:
 369         case KVM_CAP_ENABLE_CAP_VM:
 370         case KVM_CAP_S390_IRQCHIP:
 371         case KVM_CAP_VM_ATTRIBUTES:
 372         case KVM_CAP_MP_STATE:
 373         case KVM_CAP_S390_INJECT_IRQ:
 374         case KVM_CAP_S390_USER_SIGP:
 375         case KVM_CAP_S390_USER_STSI:
 376         case KVM_CAP_S390_SKEYS:
 377         case KVM_CAP_S390_IRQ_STATE:
 378         case KVM_CAP_S390_USER_INSTR0:
 379                 r = 1;
 380                 break;
 381         case KVM_CAP_S390_MEM_OP:
 382                 r = MEM_OP_MAX_SIZE;
 383                 break;
 384         case KVM_CAP_NR_VCPUS:
 385         case KVM_CAP_MAX_VCPUS:
 386                 r = KVM_S390_BSCA_CPU_SLOTS;
 387                 if (!kvm_s390_use_sca_entries())
 388                         r = KVM_MAX_VCPUS;
 389                 else if (sclp.has_esca && sclp.has_64bscao)
 390                         r = KVM_S390_ESCA_CPU_SLOTS;
 391                 break;
 392         case KVM_CAP_NR_MEMSLOTS:
 393                 r = KVM_USER_MEM_SLOTS;
 394                 break;
 395         case KVM_CAP_S390_COW:
 396                 r = MACHINE_HAS_ESOP;
 397                 break;
 398         case KVM_CAP_S390_VECTOR_REGISTERS:
 399                 r = MACHINE_HAS_VX;
 400                 break;
 401         case KVM_CAP_S390_RI:
 402                 r = test_facility(64);
 403                 break;
 404         default:
 405                 r = 0;
 406         }
 407         return r;
 408 }
 409
 410 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 411                                         struct kvm_memory_slot *memslot)
 412 {
 413         gfn_t cur_gfn, last_gfn;
 414         unsigned long address;
 415         struct gmap *gmap = kvm->arch.gmap;
 416
 417         /* Loop over all guest pages */
 418         last_gfn = memslot->base_gfn + memslot->npages;
 419         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 420                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 421
 422                 if (test_and_clear_guest_dirty(gmap->mm, address))
 423                         mark_page_dirty(kvm, cur_gfn);
 424                 if (fatal_signal_pending(current))
 425                         return;
 426                 cond_resched();
 427         }
 428 }
 429
 430 /* Section: vm related */
 431 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 432
 433 /*
 434  * Get (and clear) the dirty memory log for a memory slot.
 435  */
 436 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 437                                struct kvm_dirty_log *log)
 438 {
 439         int r;
 440         unsigned long n;
 441         struct kvm_memslots *slots;
 442         struct kvm_memory_slot *memslot;
 443         int is_dirty = 0;
 444
 445         mutex_lock(&kvm->slots_lock);
 446
 447         r = -EINVAL;
 448         if (log->slot >= KVM_USER_MEM_SLOTS)
 449                 goto out;
 450
 451         slots = kvm_memslots(kvm);
 452         memslot = id_to_memslot(slots, log->slot);
 453         r = -ENOENT;
 454         if (!memslot->dirty_bitmap)
 455                 goto out;
 456
 457         kvm_s390_sync_dirty_log(kvm, memslot);
 458         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 459         if (r)
 460                 goto out;
 461
 462         /* Clear the dirty log */
 463         if (is_dirty) {
 464                 n = kvm_dirty_bitmap_bytes(memslot);
 465                 memset(memslot->dirty_bitmap, 0, n);
 466         }
 467         r = 0;
 468 out:
 469         mutex_unlock(&kvm->slots_lock);
 470         return r;
 471 }
 472
 473 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 474 {
 475         unsigned int i;
 476         struct kvm_vcpu *vcpu;
 477
 478         kvm_for_each_vcpu(i, vcpu, kvm) {
 479                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 480         }
 481 }
 482
 483 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 484 {
 485         int r;
 486
 487         if (cap->flags)
 488                 return -EINVAL;
 489
 490         switch (cap->cap) {
 491         case KVM_CAP_S390_IRQCHIP:
 492                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 493                 kvm->arch.use_irqchip = 1;
 494                 r = 0;
 495                 break;
 496         case KVM_CAP_S390_USER_SIGP:
 497                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 498                 kvm->arch.user_sigp = 1;
 499                 r = 0;
 500                 break;
 501         case KVM_CAP_S390_VECTOR_REGISTERS:
 502                 mutex_lock(&kvm->lock);
 503                 if (kvm->created_vcpus) {
 504                         r = -EBUSY;
 505                 } else if (MACHINE_HAS_VX) {
 506                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 507                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 508                         r = 0;
 509                 } else
 510                         r = -EINVAL;
 511                 mutex_unlock(&kvm->lock);
 512                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 513                          r ? "(not available)" : "(success)");
 514                 break;
 515         case KVM_CAP_S390_RI:
 516                 r = -EINVAL;
 517                 mutex_lock(&kvm->lock);
 518                 if (kvm->created_vcpus) {
 519                         r = -EBUSY;
 520                 } else if (test_facility(64)) {
 521                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 522                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 523                         r = 0;
 524                 }
 525                 mutex_unlock(&kvm->lock);
 526                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 527                          r ? "(not available)" : "(success)");
 528                 break;
 529         case KVM_CAP_S390_USER_STSI:
 530                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 531                 kvm->arch.user_stsi = 1;
 532                 r = 0;
 533                 break;
 534         case KVM_CAP_S390_USER_INSTR0:
 535                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 536                 kvm->arch.user_instr0 = 1;
 537                 icpt_operexc_on_all_vcpus(kvm);
 538                 r = 0;
 539                 break;
 540         default:
 541                 r = -EINVAL;
 542                 break;
 543         }
 544         return r;
 545 }
 546
 547 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 548 {
 549         int ret;
 550
 551         switch (attr->attr) {
 552         case KVM_S390_VM_MEM_LIMIT_SIZE:
 553                 ret = 0;
 554                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 555                          kvm->arch.mem_limit);
 556                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 557                         ret = -EFAULT;
 558                 break;
 559         default:
 560                 ret = -ENXIO;
 561                 break;
 562         }
 563         return ret;
 564 }
 565
 566 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 567 {
 568         int ret;
 569         unsigned int idx;
 570         switch (attr->attr) {
 571         case KVM_S390_VM_MEM_ENABLE_CMMA:
 572                 ret = -ENXIO;
 573                 if (!sclp.has_cmma)
 574                         break;
 575
 576                 ret = -EBUSY;
 577                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 578                 mutex_lock(&kvm->lock);
 579                 if (!kvm->created_vcpus) {
 580                         kvm->arch.use_cmma = 1;
 581                         ret = 0;
 582                 }
 583                 mutex_unlock(&kvm->lock);
 584                 break;
 585         case KVM_S390_VM_MEM_CLR_CMMA:
 586                 ret = -ENXIO;
 587                 if (!sclp.has_cmma)
 588                         break;
 589                 ret = -EINVAL;
 590                 if (!kvm->arch.use_cmma)
 591                         break;
 592
 593                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 594                 mutex_lock(&kvm->lock);
 595                 idx = srcu_read_lock(&kvm->srcu);
 596                 s390_reset_cmma(kvm->arch.gmap->mm);
 597                 srcu_read_unlock(&kvm->srcu, idx);
 598                 mutex_unlock(&kvm->lock);
 599                 ret = 0;
 600                 break;
 601         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 602                 unsigned long new_limit;
 603
 604                 if (kvm_is_ucontrol(kvm))
 605                         return -EINVAL;
 606
 607                 if (get_user(new_limit, (u64 __user *)attr->addr))
 608                         return -EFAULT;
 609
 610                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 611                     new_limit > kvm->arch.mem_limit)
 612                         return -E2BIG;
 613
 614                 if (!new_limit)
 615                         return -EINVAL;
 616
 617                 /* gmap_create takes last usable address */
 618                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 619                         new_limit -= 1;
 620
 621                 ret = -EBUSY;
 622                 mutex_lock(&kvm->lock);
 623                 if (!kvm->created_vcpus) {
 624                         /* gmap_create will round the limit up */
 625                         struct gmap *new = gmap_create(current->mm, new_limit);
 626
 627                         if (!new) {
 628                                 ret = -ENOMEM;
 629                         } else {
 630                                 gmap_remove(kvm->arch.gmap);
 631                                 new->private = kvm;
 632                                 kvm->arch.gmap = new;
 633                                 ret = 0;
 634                         }
 635                 }
 636                 mutex_unlock(&kvm->lock);
 637                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 638                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 639                          (void *) kvm->arch.gmap->asce);
 640                 break;
 641         }
 642         default:
 643                 ret = -ENXIO;
 644                 break;
 645         }
 646         return ret;
 647 }
 648
 649 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 650
 651 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 652 {
 653         struct kvm_vcpu *vcpu;
 654         int i;
 655
 656         if (!test_kvm_facility(kvm, 76))
 657                 return -EINVAL;
 658
 659         mutex_lock(&kvm->lock);
 660         switch (attr->attr) {
 661         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 662                 get_random_bytes(
 663                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 664                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 665                 kvm->arch.crypto.aes_kw = 1;
 666                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 667                 break;
 668         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 669                 get_random_bytes(
 670                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 671                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 672                 kvm->arch.crypto.dea_kw = 1;
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 674                 break;
 675         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 676                 kvm->arch.crypto.aes_kw = 0;
 677                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 678                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 679                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 680                 break;
 681         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 682                 kvm->arch.crypto.dea_kw = 0;
 683                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 684                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 685                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 686                 break;
 687         default:
 688                 mutex_unlock(&kvm->lock);
 689                 return -ENXIO;
 690         }
 691
 692         kvm_for_each_vcpu(i, vcpu, kvm) {
 693                 kvm_s390_vcpu_crypto_setup(vcpu);
 694                 exit_sie(vcpu);
 695         }
 696         mutex_unlock(&kvm->lock);
 697         return 0;
 698 }
 699
 700 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 701 {
 702         u8 gtod_high;
 703
 704         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 705                                            sizeof(gtod_high)))
 706                 return -EFAULT;
 707
 708         if (gtod_high != 0)
 709                 return -EINVAL;
 710         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 711
 712         return 0;
 713 }
 714
 715 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 716 {
 717         u64 gtod;
 718
 719         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 720                 return -EFAULT;
 721
 722         kvm_s390_set_tod_clock(kvm, gtod);
 723         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 724         return 0;
 725 }
 726
 727 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 728 {
 729         int ret;
 730
 731         if (attr->flags)
 732                 return -EINVAL;
 733
 734         switch (attr->attr) {
 735         case KVM_S390_VM_TOD_HIGH:
 736                 ret = kvm_s390_set_tod_high(kvm, attr);
 737                 break;
 738         case KVM_S390_VM_TOD_LOW:
 739                 ret = kvm_s390_set_tod_low(kvm, attr);
 740                 break;
 741         default:
 742                 ret = -ENXIO;
 743                 break;
 744         }
 745         return ret;
 746 }
 747
 748 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 749 {
 750         u8 gtod_high = 0;
 751
 752         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 753                                          sizeof(gtod_high)))
 754                 return -EFAULT;
 755         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 756
 757         return 0;
 758 }
 759
 760 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 761 {
 762         u64 gtod;
 763
 764         gtod = kvm_s390_get_tod_clock_fast(kvm);
 765         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 766                 return -EFAULT;
 767         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 768
 769         return 0;
 770 }
 771
 772 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 773 {
 774         int ret;
 775
 776         if (attr->flags)
 777                 return -EINVAL;
 778
 779         switch (attr->attr) {
 780         case KVM_S390_VM_TOD_HIGH:
 781                 ret = kvm_s390_get_tod_high(kvm, attr);
 782                 break;
 783         case KVM_S390_VM_TOD_LOW:
 784                 ret = kvm_s390_get_tod_low(kvm, attr);
 785                 break;
 786         default:
 787                 ret = -ENXIO;
 788                 break;
 789         }
 790         return ret;
 791 }
 792
 793 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 794 {
 795         struct kvm_s390_vm_cpu_processor *proc;
 796         u16 lowest_ibc, unblocked_ibc;
 797         int ret = 0;
 798
 799         mutex_lock(&kvm->lock);
 800         if (kvm->created_vcpus) {
 801                 ret = -EBUSY;
 802                 goto out;
 803         }
 804         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 805         if (!proc) {
 806                 ret = -ENOMEM;
 807                 goto out;
 808         }
 809         if (!copy_from_user(proc, (void __user *)attr->addr,
 810                             sizeof(*proc))) {
 811                 kvm->arch.model.cpuid = proc->cpuid;
 812                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 813                 unblocked_ibc = sclp.ibc & 0xfff;
 814                 if (lowest_ibc && proc->ibc) {
 815                         if (proc->ibc > unblocked_ibc)
 816                                 kvm->arch.model.ibc = unblocked_ibc;
 817                         else if (proc->ibc < lowest_ibc)
 818                                 kvm->arch.model.ibc = lowest_ibc;
 819                         else
 820                                 kvm->arch.model.ibc = proc->ibc;
 821                 }
 822                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 823                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 824         } else
 825                 ret = -EFAULT;
 826         kfree(proc);
 827 out:
 828         mutex_unlock(&kvm->lock);
 829         return ret;
 830 }
 831
 832 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 833                                        struct kvm_device_attr *attr)
 834 {
 835         struct kvm_s390_vm_cpu_feat data;
 836         int ret = -EBUSY;
 837
 838         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 839                 return -EFAULT;
 840         if (!bitmap_subset((unsigned long *) data.feat,
 841                            kvm_s390_available_cpu_feat,
 842                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 843                 return -EINVAL;
 844
 845         mutex_lock(&kvm->lock);
 846         if (!atomic_read(&kvm->online_vcpus)) {
 847                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 848                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 849                 ret = 0;
 850         }
 851         mutex_unlock(&kvm->lock);
 852         return ret;
 853 }
 854
 855 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 856                                           struct kvm_device_attr *attr)
 857 {
 858         /*
 859          * Once supported by kernel + hw, we have to store the subfunctions
 860          * in kvm->arch and remember that user space configured them.
 861          */
 862         return -ENXIO;
 863 }
 864
 865 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 866 {
 867         int ret = -ENXIO;
 868
 869         switch (attr->attr) {
 870         case KVM_S390_VM_CPU_PROCESSOR:
 871                 ret = kvm_s390_set_processor(kvm, attr);
 872                 break;
 873         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 874                 ret = kvm_s390_set_processor_feat(kvm, attr);
 875                 break;
 876         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 877                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 878                 break;
 879         }
 880         return ret;
 881 }
 882
 883 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 884 {
 885         struct kvm_s390_vm_cpu_processor *proc;
 886         int ret = 0;
 887
 888         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 889         if (!proc) {
 890                 ret = -ENOMEM;
 891                 goto out;
 892         }
 893         proc->cpuid = kvm->arch.model.cpuid;
 894         proc->ibc = kvm->arch.model.ibc;
 895         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 896                S390_ARCH_FAC_LIST_SIZE_BYTE);
 897         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 898                 ret = -EFAULT;
 899         kfree(proc);
 900 out:
 901         return ret;
 902 }
 903
 904 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 905 {
 906         struct kvm_s390_vm_cpu_machine *mach;
 907         int ret = 0;
 908
 909         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 910         if (!mach) {
 911                 ret = -ENOMEM;
 912                 goto out;
 913         }
 914         get_cpu_id((struct cpuid *) &mach->cpuid);
 915         mach->ibc = sclp.ibc;
 916         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 917                S390_ARCH_FAC_LIST_SIZE_BYTE);
 918         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 919                S390_ARCH_FAC_LIST_SIZE_BYTE);
 920         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 921                 ret = -EFAULT;
 922         kfree(mach);
 923 out:
 924         return ret;
 925 }
 926
 927 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 928                                        struct kvm_device_attr *attr)
 929 {
 930         struct kvm_s390_vm_cpu_feat data;
 931
 932         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 933                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 934         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 935                 return -EFAULT;
 936         return 0;
 937 }
 938
 939 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 940                                      struct kvm_device_attr *attr)
 941 {
 942         struct kvm_s390_vm_cpu_feat data;
 943
 944         bitmap_copy((unsigned long *) data.feat,
 945                     kvm_s390_available_cpu_feat,
 946                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 947         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 948                 return -EFAULT;
 949         return 0;
 950 }
 951
 952 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 953                                           struct kvm_device_attr *attr)
 954 {
 955         /*
 956          * Once we can actually configure subfunctions (kernel + hw support),
 957          * we have to check if they were already set by user space, if so copy
 958          * them from kvm->arch.
 959          */
 960         return -ENXIO;
 961 }
 962
 963 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 964                                         struct kvm_device_attr *attr)
 965 {
 966         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 967             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 968                 return -EFAULT;
 969         return 0;
 970 }
 971 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 972 {
 973         int ret = -ENXIO;
 974
 975         switch (attr->attr) {
 976         case KVM_S390_VM_CPU_PROCESSOR:
 977                 ret = kvm_s390_get_processor(kvm, attr);
 978                 break;
 979         case KVM_S390_VM_CPU_MACHINE:
 980                 ret = kvm_s390_get_machine(kvm, attr);
 981                 break;
 982         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 983                 ret = kvm_s390_get_processor_feat(kvm, attr);
 984                 break;
 985         case KVM_S390_VM_CPU_MACHINE_FEAT:
 986                 ret = kvm_s390_get_machine_feat(kvm, attr);
 987                 break;
 988         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 989                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 990                 break;
 991         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 992                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 993                 break;
 994         }
 995         return ret;
 996 }
 997
 998 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 999 {
1000         int ret;
1001
1002         switch (attr->group) {
1003         case KVM_S390_VM_MEM_CTRL:
1004                 ret = kvm_s390_set_mem_control(kvm, attr);
1005                 break;
1006         case KVM_S390_VM_TOD:
1007                 ret = kvm_s390_set_tod(kvm, attr);
1008                 break;
1009         case KVM_S390_VM_CPU_MODEL:
1010                 ret = kvm_s390_set_cpu_model(kvm, attr);
1011                 break;
1012         case KVM_S390_VM_CRYPTO:
1013                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1014                 break;
1015         default:
1016                 ret = -ENXIO;
1017                 break;
1018         }
1019
1020         return ret;
1021 }
1022
1023 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1024 {
1025         int ret;
1026
1027         switch (attr->group) {
1028         case KVM_S390_VM_MEM_CTRL:
1029                 ret = kvm_s390_get_mem_control(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD:
1032                 ret = kvm_s390_get_tod(kvm, attr);
1033                 break;
1034         case KVM_S390_VM_CPU_MODEL:
1035                 ret = kvm_s390_get_cpu_model(kvm, attr);
1036                 break;
1037         default:
1038                 ret = -ENXIO;
1039                 break;
1040         }
1041
1042         return ret;
1043 }
1044
1045 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1046 {
1047         int ret;
1048
1049         switch (attr->group) {
1050         case KVM_S390_VM_MEM_CTRL:
1051                 switch (attr->attr) {
1052                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1053                 case KVM_S390_VM_MEM_CLR_CMMA:
1054                         ret = sclp.has_cmma ? 0 : -ENXIO;
1055                         break;
1056                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1057                         ret = 0;
1058                         break;
1059                 default:
1060                         ret = -ENXIO;
1061                         break;
1062                 }
1063                 break;
1064         case KVM_S390_VM_TOD:
1065                 switch (attr->attr) {
1066                 case KVM_S390_VM_TOD_LOW:
1067                 case KVM_S390_VM_TOD_HIGH:
1068                         ret = 0;
1069                         break;
1070                 default:
1071                         ret = -ENXIO;
1072                         break;
1073                 }
1074                 break;
1075         case KVM_S390_VM_CPU_MODEL:
1076                 switch (attr->attr) {
1077                 case KVM_S390_VM_CPU_PROCESSOR:
1078                 case KVM_S390_VM_CPU_MACHINE:
1079                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1080                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1081                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1082                         ret = 0;
1083                         break;
1084                 /* configuring subfunctions is not supported yet */
1085                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1086                 default:
1087                         ret = -ENXIO;
1088                         break;
1089                 }
1090                 break;
1091         case KVM_S390_VM_CRYPTO:
1092                 switch (attr->attr) {
1093                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1094                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1095                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1096                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1097                         ret = 0;
1098                         break;
1099                 default:
1100                         ret = -ENXIO;
1101                         break;
1102                 }
1103                 break;
1104         default:
1105                 ret = -ENXIO;
1106                 break;
1107         }
1108
1109         return ret;
1110 }
1111
1112 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1113 {
1114         uint8_t *keys;
1115         uint64_t hva;
1116         int i, r = 0;
1117
1118         if (args->flags != 0)
1119                 return -EINVAL;
1120
1121         /* Is this guest using storage keys? */
1122         if (!mm_use_skey(current->mm))
1123                 return KVM_S390_GET_SKEYS_NONE;
1124
1125         /* Enforce sane limit on memory allocation */
1126         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1127                 return -EINVAL;
1128
1129         keys = kmalloc_array(args->count, sizeof(uint8_t),
1130                              GFP_KERNEL | __GFP_NOWARN);
1131         if (!keys)
1132                 keys = vmalloc(sizeof(uint8_t) * args->count);
1133         if (!keys)
1134                 return -ENOMEM;
1135
1136         down_read(&current->mm->mmap_sem);
1137         for (i = 0; i < args->count; i++) {
1138                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1139                 if (kvm_is_error_hva(hva)) {
1140                         r = -EFAULT;
1141                         break;
1142                 }
1143
1144                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1145                 if (r)
1146                         break;
1147         }
1148         up_read(&current->mm->mmap_sem);
1149
1150         if (!r) {
1151                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1152                                  sizeof(uint8_t) * args->count);
1153                 if (r)
1154                         r = -EFAULT;
1155         }
1156
1157         kvfree(keys);
1158         return r;
1159 }
1160
1161 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1162 {
1163         uint8_t *keys;
1164         uint64_t hva;
1165         int i, r = 0;
1166
1167         if (args->flags != 0)
1168                 return -EINVAL;
1169
1170         /* Enforce sane limit on memory allocation */
1171         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1172                 return -EINVAL;
1173
1174         keys = kmalloc_array(args->count, sizeof(uint8_t),
1175                              GFP_KERNEL | __GFP_NOWARN);
1176         if (!keys)
1177                 keys = vmalloc(sizeof(uint8_t) * args->count);
1178         if (!keys)
1179                 return -ENOMEM;
1180
1181         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1182                            sizeof(uint8_t) * args->count);
1183         if (r) {
1184                 r = -EFAULT;
1185                 goto out;
1186         }
1187
1188         /* Enable storage key handling for the guest */
1189         r = s390_enable_skey();
1190         if (r)
1191                 goto out;
1192
1193         down_read(&current->mm->mmap_sem);
1194         for (i = 0; i < args->count; i++) {
1195                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1196                 if (kvm_is_error_hva(hva)) {
1197                         r = -EFAULT;
1198                         break;
1199                 }
1200
1201                 /* Lowest order bit is reserved */
1202                 if (keys[i] & 0x01) {
1203                         r = -EINVAL;
1204                         break;
1205                 }
1206
1207                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1208                 if (r)
1209                         break;
1210         }
1211         up_read(&current->mm->mmap_sem);
1212 out:
1213         kvfree(keys);
1214         return r;
1215 }
1216
1217 long kvm_arch_vm_ioctl(struct file *filp,
1218                        unsigned int ioctl, unsigned long arg)
1219 {
1220         struct kvm *kvm = filp->private_data;
1221         void __user *argp = (void __user *)arg;
1222         struct kvm_device_attr attr;
1223         int r;
1224
1225         switch (ioctl) {
1226         case KVM_S390_INTERRUPT: {
1227                 struct kvm_s390_interrupt s390int;
1228
1229                 r = -EFAULT;
1230                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1231                         break;
1232                 r = kvm_s390_inject_vm(kvm, &s390int);
1233                 break;
1234         }
1235         case KVM_ENABLE_CAP: {
1236                 struct kvm_enable_cap cap;
1237                 r = -EFAULT;
1238                 if (copy_from_user(&cap, argp, sizeof(cap)))
1239                         break;
1240                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1241                 break;
1242         }
1243         case KVM_CREATE_IRQCHIP: {
1244                 struct kvm_irq_routing_entry routing;
1245
1246                 r = -EINVAL;
1247                 if (kvm->arch.use_irqchip) {
1248                         /* Set up dummy routing. */
1249                         memset(&routing, 0, sizeof(routing));
1250                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1251                 }
1252                 break;
1253         }
1254         case KVM_SET_DEVICE_ATTR: {
1255                 r = -EFAULT;
1256                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1257                         break;
1258                 r = kvm_s390_vm_set_attr(kvm, &attr);
1259                 break;
1260         }
1261         case KVM_GET_DEVICE_ATTR: {
1262                 r = -EFAULT;
1263                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1264                         break;
1265                 r = kvm_s390_vm_get_attr(kvm, &attr);
1266                 break;
1267         }
1268         case KVM_HAS_DEVICE_ATTR: {
1269                 r = -EFAULT;
1270                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1271                         break;
1272                 r = kvm_s390_vm_has_attr(kvm, &attr);
1273                 break;
1274         }
1275         case KVM_S390_GET_SKEYS: {
1276                 struct kvm_s390_skeys args;
1277
1278                 r = -EFAULT;
1279                 if (copy_from_user(&args, argp,
1280                                    sizeof(struct kvm_s390_skeys)))
1281                         break;
1282                 r = kvm_s390_get_skeys(kvm, &args);
1283                 break;
1284         }
1285         case KVM_S390_SET_SKEYS: {
1286                 struct kvm_s390_skeys args;
1287
1288                 r = -EFAULT;
1289                 if (copy_from_user(&args, argp,
1290                                    sizeof(struct kvm_s390_skeys)))
1291                         break;
1292                 r = kvm_s390_set_skeys(kvm, &args);
1293                 break;
1294         }
1295         default:
1296                 r = -ENOTTY;
1297         }
1298
1299         return r;
1300 }
1301
1302 static int kvm_s390_query_ap_config(u8 *config)
1303 {
1304         u32 fcn_code = 0x04000000UL;
1305         u32 cc = 0;
1306
1307         memset(config, 0, 128);
1308         asm volatile(
1309                 "lgr 0,%1\n"
1310                 "lgr 2,%2\n"
1311                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1312                 "0: ipm %0\n"
1313                 "srl %0,28\n"
1314                 "1:\n"
1315                 EX_TABLE(0b, 1b)
1316                 : "+r" (cc)
1317                 : "r" (fcn_code), "r" (config)
1318                 : "cc", "0", "2", "memory"
1319         );
1320
1321         return cc;
1322 }
1323
1324 static int kvm_s390_apxa_installed(void)
1325 {
1326         u8 config[128];
1327         int cc;
1328
1329         if (test_facility(12)) {
1330                 cc = kvm_s390_query_ap_config(config);
1331
1332                 if (cc)
1333                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1334                 else
1335                         return config[0] & 0x40;
1336         }
1337
1338         return 0;
1339 }
1340
1341 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1342 {
1343         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1344
1345         if (kvm_s390_apxa_installed())
1346                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1347         else
1348                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1349 }
1350
1351 static u64 kvm_s390_get_initial_cpuid(void)
1352 {
1353         struct cpuid cpuid;
1354
1355         get_cpu_id(&cpuid);
1356         cpuid.version = 0xff;
1357         return *((u64 *) &cpuid);
1358 }
1359
1360 static void kvm_s390_crypto_init(struct kvm *kvm)
1361 {
1362         if (!test_kvm_facility(kvm, 76))
1363                 return;
1364
1365         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1366         kvm_s390_set_crycb_format(kvm);
1367
1368         /* Enable AES/DEA protected key functions by default */
1369         kvm->arch.crypto.aes_kw = 1;
1370         kvm->arch.crypto.dea_kw = 1;
1371         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1372                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1373         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1374                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1375 }
1376
1377 static void sca_dispose(struct kvm *kvm)
1378 {
1379         if (kvm->arch.use_esca)
1380                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1381         else
1382                 free_page((unsigned long)(kvm->arch.sca));
1383         kvm->arch.sca = NULL;
1384 }
1385
1386 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1387 {
1388         gfp_t alloc_flags = GFP_KERNEL;
1389         int i, rc;
1390         char debug_name[16];
1391         static unsigned long sca_offset;
1392
1393         rc = -EINVAL;
1394 #ifdef CONFIG_KVM_S390_UCONTROL
1395         if (type & ~KVM_VM_S390_UCONTROL)
1396                 goto out_err;
1397         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1398                 goto out_err;
1399 #else
1400         if (type)
1401                 goto out_err;
1402 #endif
1403
1404         rc = s390_enable_sie();
1405         if (rc)
1406                 goto out_err;
1407
1408         rc = -ENOMEM;
1409
1410         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1411
1412         kvm->arch.use_esca = 0; /* start with basic SCA */
1413         if (!sclp.has_64bscao)
1414                 alloc_flags |= GFP_DMA;
1415         rwlock_init(&kvm->arch.sca_lock);
1416         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1417         if (!kvm->arch.sca)
1418                 goto out_err;
1419         spin_lock(&kvm_lock);
1420         sca_offset += 16;
1421         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1422                 sca_offset = 0;
1423         kvm->arch.sca = (struct bsca_block *)
1424                         ((char *) kvm->arch.sca + sca_offset);
1425         spin_unlock(&kvm_lock);
1426
1427         sprintf(debug_name, "kvm-%u", current->pid);
1428
1429         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1430         if (!kvm->arch.dbf)
1431                 goto out_err;
1432
1433         kvm->arch.sie_page2 =
1434              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1435         if (!kvm->arch.sie_page2)
1436                 goto out_err;
1437
1438         /* Populate the facility mask initially. */
1439         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1440                S390_ARCH_FAC_LIST_SIZE_BYTE);
1441         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1442                 if (i < kvm_s390_fac_list_mask_size())
1443                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1444                 else
1445                         kvm->arch.model.fac_mask[i] = 0UL;
1446         }
1447
1448         /* Populate the facility list initially. */
1449         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1450         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1451                S390_ARCH_FAC_LIST_SIZE_BYTE);
1452
1453         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1454         set_kvm_facility(kvm->arch.model.fac_list, 74);
1455
1456         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1457         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1458
1459         kvm_s390_crypto_init(kvm);
1460
1461         spin_lock_init(&kvm->arch.float_int.lock);
1462         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1463                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1464         init_waitqueue_head(&kvm->arch.ipte_wq);
1465         mutex_init(&kvm->arch.ipte_mutex);
1466
1467         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1468         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1469
1470         if (type & KVM_VM_S390_UCONTROL) {
1471                 kvm->arch.gmap = NULL;
1472                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1473         } else {
1474                 if (sclp.hamax == U64_MAX)
1475                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1476                 else
1477                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1478                                                     sclp.hamax + 1);
1479                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1480                 if (!kvm->arch.gmap)
1481                         goto out_err;
1482                 kvm->arch.gmap->private = kvm;
1483                 kvm->arch.gmap->pfault_enabled = 0;
1484         }
1485
1486         kvm->arch.css_support = 0;
1487         kvm->arch.use_irqchip = 0;
1488         kvm->arch.epoch = 0;
1489
1490         spin_lock_init(&kvm->arch.start_stop_lock);
1491         kvm_s390_vsie_init(kvm);
1492         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1493
1494         return 0;
1495 out_err:
1496         free_page((unsigned long)kvm->arch.sie_page2);
1497         debug_unregister(kvm->arch.dbf);
1498         sca_dispose(kvm);
1499         KVM_EVENT(3, "creation of vm failed: %d", rc);
1500         return rc;
1501 }
1502
1503 bool kvm_arch_has_vcpu_debugfs(void)
1504 {
1505         return false;
1506 }
1507
1508 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1509 {
1510         return 0;
1511 }
1512
1513 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1514 {
1515         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1516         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1517         kvm_s390_clear_local_irqs(vcpu);
1518         kvm_clear_async_pf_completion_queue(vcpu);
1519         if (!kvm_is_ucontrol(vcpu->kvm))
1520                 sca_del_vcpu(vcpu);
1521
1522         if (kvm_is_ucontrol(vcpu->kvm))
1523                 gmap_remove(vcpu->arch.gmap);
1524
1525         if (vcpu->kvm->arch.use_cmma)
1526                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1527         free_page((unsigned long)(vcpu->arch.sie_block));
1528
1529         kvm_vcpu_uninit(vcpu);
1530         kmem_cache_free(kvm_vcpu_cache, vcpu);
1531 }
1532
1533 static void kvm_free_vcpus(struct kvm *kvm)
1534 {
1535         unsigned int i;
1536         struct kvm_vcpu *vcpu;
1537
1538         kvm_for_each_vcpu(i, vcpu, kvm)
1539                 kvm_arch_vcpu_destroy(vcpu);
1540
1541         mutex_lock(&kvm->lock);
1542         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1543                 kvm->vcpus[i] = NULL;
1544
1545         atomic_set(&kvm->online_vcpus, 0);
1546         mutex_unlock(&kvm->lock);
1547 }
1548
1549 void kvm_arch_destroy_vm(struct kvm *kvm)
1550 {
1551         kvm_free_vcpus(kvm);
1552         sca_dispose(kvm);
1553         debug_unregister(kvm->arch.dbf);
1554         free_page((unsigned long)kvm->arch.sie_page2);
1555         if (!kvm_is_ucontrol(kvm))
1556                 gmap_remove(kvm->arch.gmap);
1557         kvm_s390_destroy_adapters(kvm);
1558         kvm_s390_clear_float_irqs(kvm);
1559         kvm_s390_vsie_destroy(kvm);
1560         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1561 }
1562
1563 /* Section: vcpu related */
1564 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1565 {
1566         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1567         if (!vcpu->arch.gmap)
1568                 return -ENOMEM;
1569         vcpu->arch.gmap->private = vcpu->kvm;
1570
1571         return 0;
1572 }
1573
1574 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1575 {
1576         if (!kvm_s390_use_sca_entries())
1577                 return;
1578         read_lock(&vcpu->kvm->arch.sca_lock);
1579         if (vcpu->kvm->arch.use_esca) {
1580                 struct esca_block *sca = vcpu->kvm->arch.sca;
1581
1582                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1583                 sca->cpu[vcpu->vcpu_id].sda = 0;
1584         } else {
1585                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1586
1587                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1588                 sca->cpu[vcpu->vcpu_id].sda = 0;
1589         }
1590         read_unlock(&vcpu->kvm->arch.sca_lock);
1591 }
1592
1593 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1594 {
1595         if (!kvm_s390_use_sca_entries()) {
1596                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1597
1598                 /* we still need the basic sca for the ipte control */
1599                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1600                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1601         }
1602         read_lock(&vcpu->kvm->arch.sca_lock);
1603         if (vcpu->kvm->arch.use_esca) {
1604                 struct esca_block *sca = vcpu->kvm->arch.sca;
1605
1606                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1607                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1608                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1609                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1610                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1611         } else {
1612                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1613
1614                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1615                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1616                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1617                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1618         }
1619         read_unlock(&vcpu->kvm->arch.sca_lock);
1620 }
1621
1622 /* Basic SCA to Extended SCA data copy routines */
1623 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1624 {
1625         d->sda = s->sda;
1626         d->sigp_ctrl.c = s->sigp_ctrl.c;
1627         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1628 }
1629
1630 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1631 {
1632         int i;
1633
1634         d->ipte_control = s->ipte_control;
1635         d->mcn[0] = s->mcn;
1636         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1637                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1638 }
1639
1640 static int sca_switch_to_extended(struct kvm *kvm)
1641 {
1642         struct bsca_block *old_sca = kvm->arch.sca;
1643         struct esca_block *new_sca;
1644         struct kvm_vcpu *vcpu;
1645         unsigned int vcpu_idx;
1646         u32 scaol, scaoh;
1647
1648         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1649         if (!new_sca)
1650                 return -ENOMEM;
1651
1652         scaoh = (u32)((u64)(new_sca) >> 32);
1653         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1654
1655         kvm_s390_vcpu_block_all(kvm);
1656         write_lock(&kvm->arch.sca_lock);
1657
1658         sca_copy_b_to_e(new_sca, old_sca);
1659
1660         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1661                 vcpu->arch.sie_block->scaoh = scaoh;
1662                 vcpu->arch.sie_block->scaol = scaol;
1663                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1664         }
1665         kvm->arch.sca = new_sca;
1666         kvm->arch.use_esca = 1;
1667
1668         write_unlock(&kvm->arch.sca_lock);
1669         kvm_s390_vcpu_unblock_all(kvm);
1670
1671         free_page((unsigned long)old_sca);
1672
1673         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1674                  old_sca, kvm->arch.sca);
1675         return 0;
1676 }
1677
1678 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1679 {
1680         int rc;
1681
1682         if (!kvm_s390_use_sca_entries()) {
1683                 if (id < KVM_MAX_VCPUS)
1684                         return true;
1685                 return false;
1686         }
1687         if (id < KVM_S390_BSCA_CPU_SLOTS)
1688                 return true;
1689         if (!sclp.has_esca || !sclp.has_64bscao)
1690                 return false;
1691
1692         mutex_lock(&kvm->lock);
1693         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1694         mutex_unlock(&kvm->lock);
1695
1696         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1697 }
1698
1699 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1700 {
1701         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1702         kvm_clear_async_pf_completion_queue(vcpu);
1703         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1704                                     KVM_SYNC_GPRS |
1705                                     KVM_SYNC_ACRS |
1706                                     KVM_SYNC_CRS |
1707                                     KVM_SYNC_ARCH0 |
1708                                     KVM_SYNC_PFAULT;
1709         kvm_s390_set_prefix(vcpu, 0);
1710         if (test_kvm_facility(vcpu->kvm, 64))
1711                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1712         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1713          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1714          */
1715         if (MACHINE_HAS_VX)
1716                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1717         else
1718                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1719
1720         if (kvm_is_ucontrol(vcpu->kvm))
1721                 return __kvm_ucontrol_vcpu_init(vcpu);
1722
1723         return 0;
1724 }
1725
1726 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1727 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1728 {
1729         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1730         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1731         vcpu->arch.cputm_start = get_tod_clock_fast();
1732         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1733 }
1734
1735 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1736 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1737 {
1738         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1739         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1740         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1741         vcpu->arch.cputm_start = 0;
1742         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1743 }
1744
1745 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1746 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1747 {
1748         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1749         vcpu->arch.cputm_enabled = true;
1750         __start_cpu_timer_accounting(vcpu);
1751 }
1752
1753 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1754 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1755 {
1756         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1757         __stop_cpu_timer_accounting(vcpu);
1758         vcpu->arch.cputm_enabled = false;
1759 }
1760
1761 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1762 {
1763         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1764         __enable_cpu_timer_accounting(vcpu);
1765         preempt_enable();
1766 }
1767
1768 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1769 {
1770         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1771         __disable_cpu_timer_accounting(vcpu);
1772         preempt_enable();
1773 }
1774
1775 /* set the cpu timer - may only be called from the VCPU thread itself */
1776 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1777 {
1778         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1779         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1780         if (vcpu->arch.cputm_enabled)
1781                 vcpu->arch.cputm_start = get_tod_clock_fast();
1782         vcpu->arch.sie_block->cputm = cputm;
1783         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1784         preempt_enable();
1785 }
1786
1787 /* update and get the cpu timer - can also be called from other VCPU threads */
1788 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1789 {
1790         unsigned int seq;
1791         __u64 value;
1792
1793         if (unlikely(!vcpu->arch.cputm_enabled))
1794                 return vcpu->arch.sie_block->cputm;
1795
1796         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1797         do {
1798                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1799                 /*
1800                  * If the writer would ever execute a read in the critical
1801                  * section, e.g. in irq context, we have a deadlock.
1802                  */
1803                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1804                 value = vcpu->arch.sie_block->cputm;
1805                 /* if cputm_start is 0, accounting is being started/stopped */
1806                 if (likely(vcpu->arch.cputm_start))
1807                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1808         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1809         preempt_enable();
1810         return value;
1811 }
1812
1813 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1814 {
1815         /* Save host register state */
1816         save_fpu_regs();
1817         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1818         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1819
1820         if (MACHINE_HAS_VX)
1821                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1822         else
1823                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1824         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1825         if (test_fp_ctl(current->thread.fpu.fpc))
1826                 /* User space provided an invalid FPC, let's clear it */
1827                 current->thread.fpu.fpc = 0;
1828
1829         gmap_enable(vcpu->arch.enabled_gmap);
1830         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1831         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1832                 __start_cpu_timer_accounting(vcpu);
1833         vcpu->cpu = cpu;
1834 }
1835
1836 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1837 {
1838         vcpu->cpu = -1;
1839         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1840                 __stop_cpu_timer_accounting(vcpu);
1841         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1842         vcpu->arch.enabled_gmap = gmap_get_enabled();
1843         gmap_disable(vcpu->arch.enabled_gmap);
1844
1845         /* Save guest register state */
1846         save_fpu_regs();
1847         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1848
1849         /* Restore host register state */
1850         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1851         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1852 }
1853
1854 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1855 {
1856         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1857         vcpu->arch.sie_block->gpsw.mask = 0UL;
1858         vcpu->arch.sie_block->gpsw.addr = 0UL;
1859         kvm_s390_set_prefix(vcpu, 0);
1860         kvm_s390_set_cpu_timer(vcpu, 0);
1861         vcpu->arch.sie_block->ckc       = 0UL;
1862         vcpu->arch.sie_block->todpr     = 0;
1863         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1864         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1865         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1866         /* make sure the new fpc will be lazily loaded */
1867         save_fpu_regs();
1868         current->thread.fpu.fpc = 0;
1869         vcpu->arch.sie_block->gbea = 1;
1870         vcpu->arch.sie_block->pp = 0;
1871         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1872         kvm_clear_async_pf_completion_queue(vcpu);
1873         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1874                 kvm_s390_vcpu_stop(vcpu);
1875         kvm_s390_clear_local_irqs(vcpu);
1876 }
1877
1878 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1879 {
1880         mutex_lock(&vcpu->kvm->lock);
1881         preempt_disable();
1882         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1883         preempt_enable();
1884         mutex_unlock(&vcpu->kvm->lock);
1885         if (!kvm_is_ucontrol(vcpu->kvm)) {
1886                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1887                 sca_add_vcpu(vcpu);
1888         }
1889         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1890                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1891         /* make vcpu_load load the right gmap on the first trigger */
1892         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1893 }
1894
1895 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1896 {
1897         if (!test_kvm_facility(vcpu->kvm, 76))
1898                 return;
1899
1900         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1901
1902         if (vcpu->kvm->arch.crypto.aes_kw)
1903                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1904         if (vcpu->kvm->arch.crypto.dea_kw)
1905                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1906
1907         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1908 }
1909
1910 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1911 {
1912         free_page(vcpu->arch.sie_block->cbrlo);
1913         vcpu->arch.sie_block->cbrlo = 0;
1914 }
1915
1916 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1917 {
1918         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1919         if (!vcpu->arch.sie_block->cbrlo)
1920                 return -ENOMEM;
1921
1922         vcpu->arch.sie_block->ecb2 |= 0x80;
1923         vcpu->arch.sie_block->ecb2 &= ~0x08;
1924         return 0;
1925 }
1926
1927 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1928 {
1929         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1930
1931         vcpu->arch.sie_block->ibc = model->ibc;
1932         if (test_kvm_facility(vcpu->kvm, 7))
1933                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1934 }
1935
1936 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1937 {
1938         int rc = 0;
1939
1940         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1941                                                     CPUSTAT_SM |
1942                                                     CPUSTAT_STOPPED);
1943
1944         if (test_kvm_facility(vcpu->kvm, 78))
1945                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1946         else if (test_kvm_facility(vcpu->kvm, 8))
1947                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1948
1949         kvm_s390_vcpu_setup_model(vcpu);
1950
1951         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1952         if (MACHINE_HAS_ESOP)
1953                 vcpu->arch.sie_block->ecb |= 0x02;
1954         if (test_kvm_facility(vcpu->kvm, 9))
1955                 vcpu->arch.sie_block->ecb |= 0x04;
1956         if (test_kvm_facility(vcpu->kvm, 73))
1957                 vcpu->arch.sie_block->ecb |= 0x10;
1958
1959         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1960                 vcpu->arch.sie_block->ecb2 |= 0x08;
1961         vcpu->arch.sie_block->eca = 0x1002000U;
1962         if (sclp.has_cei)
1963                 vcpu->arch.sie_block->eca |= 0x80000000U;
1964         if (sclp.has_ib)
1965                 vcpu->arch.sie_block->eca |= 0x40000000U;
1966         if (sclp.has_siif)
1967                 vcpu->arch.sie_block->eca |= 1;
1968         if (sclp.has_sigpif)
1969                 vcpu->arch.sie_block->eca |= 0x10000000U;
1970         if (test_kvm_facility(vcpu->kvm, 129)) {
1971                 vcpu->arch.sie_block->eca |= 0x00020000;
1972                 vcpu->arch.sie_block->ecd |= 0x20000000;
1973         }
1974         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1975         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1976
1977         if (vcpu->kvm->arch.use_cmma) {
1978                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1979                 if (rc)
1980                         return rc;
1981         }
1982         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1983         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1984
1985         kvm_s390_vcpu_crypto_setup(vcpu);
1986
1987         return rc;
1988 }
1989
1990 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1991                                       unsigned int id)
1992 {
1993         struct kvm_vcpu *vcpu;
1994         struct sie_page *sie_page;
1995         int rc = -EINVAL;
1996
1997         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1998                 goto out;
1999
2000         rc = -ENOMEM;
2001
2002         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2003         if (!vcpu)
2004                 goto out;
2005
2006         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2007         if (!sie_page)
2008                 goto out_free_cpu;
2009
2010         vcpu->arch.sie_block = &sie_page->sie_block;
2011         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2012
2013         /* the real guest size will always be smaller than msl */
2014         vcpu->arch.sie_block->mso = 0;
2015         vcpu->arch.sie_block->msl = sclp.hamax;
2016
2017         vcpu->arch.sie_block->icpua = id;
2018         spin_lock_init(&vcpu->arch.local_int.lock);
2019         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2020         vcpu->arch.local_int.wq = &vcpu->wq;
2021         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2022         seqcount_init(&vcpu->arch.cputm_seqcount);
2023
2024         rc = kvm_vcpu_init(vcpu, kvm, id);
2025         if (rc)
2026                 goto out_free_sie_block;
2027         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2028                  vcpu->arch.sie_block);
2029         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2030
2031         return vcpu;
2032 out_free_sie_block:
2033         free_page((unsigned long)(vcpu->arch.sie_block));
2034 out_free_cpu:
2035         kmem_cache_free(kvm_vcpu_cache, vcpu);
2036 out:
2037         return ERR_PTR(rc);
2038 }
2039
2040 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2041 {
2042         return kvm_s390_vcpu_has_irq(vcpu, 0);
2043 }
2044
2045 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2046 {
2047         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2048         exit_sie(vcpu);
2049 }
2050
2051 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2052 {
2053         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2054 }
2055
2056 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2057 {
2058         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2059         exit_sie(vcpu);
2060 }
2061
2062 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2063 {
2064         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2065 }
2066
2067 /*
2068  * Kick a guest cpu out of SIE and wait until SIE is not running.
2069  * If the CPU is not running (e.g. waiting as idle) the function will
2070  * return immediately. */
2071 void exit_sie(struct kvm_vcpu *vcpu)
2072 {
2073         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2074         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2075                 cpu_relax();
2076 }
2077
2078 /* Kick a guest cpu out of SIE to process a request synchronously */
2079 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2080 {
2081         kvm_make_request(req, vcpu);
2082         kvm_s390_vcpu_request(vcpu);
2083 }
2084
2085 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2086                               unsigned long end)
2087 {
2088         struct kvm *kvm = gmap->private;
2089         struct kvm_vcpu *vcpu;
2090         unsigned long prefix;
2091         int i;
2092
2093         if (gmap_is_shadow(gmap))
2094                 return;
2095         if (start >= 1UL << 31)
2096                 /* We are only interested in prefix pages */
2097                 return;
2098         kvm_for_each_vcpu(i, vcpu, kvm) {
2099                 /* match against both prefix pages */
2100                 prefix = kvm_s390_get_prefix(vcpu);
2101                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2102                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2103                                    start, end);
2104                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2105                 }
2106         }
2107 }
2108
2109 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2110 {
2111         /* kvm common code refers to this, but never calls it */
2112         BUG();
2113         return 0;
2114 }
2115
2116 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2117                                            struct kvm_one_reg *reg)
2118 {
2119         int r = -EINVAL;
2120
2121         switch (reg->id) {
2122         case KVM_REG_S390_TODPR:
2123                 r = put_user(vcpu->arch.sie_block->todpr,
2124                              (u32 __user *)reg->addr);
2125                 break;
2126         case KVM_REG_S390_EPOCHDIFF:
2127                 r = put_user(vcpu->arch.sie_block->epoch,
2128                              (u64 __user *)reg->addr);
2129                 break;
2130         case KVM_REG_S390_CPU_TIMER:
2131                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2132                              (u64 __user *)reg->addr);
2133                 break;
2134         case KVM_REG_S390_CLOCK_COMP:
2135                 r = put_user(vcpu->arch.sie_block->ckc,
2136                              (u64 __user *)reg->addr);
2137                 break;
2138         case KVM_REG_S390_PFTOKEN:
2139                 r = put_user(vcpu->arch.pfault_token,
2140                              (u64 __user *)reg->addr);
2141                 break;
2142         case KVM_REG_S390_PFCOMPARE:
2143                 r = put_user(vcpu->arch.pfault_compare,
2144                              (u64 __user *)reg->addr);
2145                 break;
2146         case KVM_REG_S390_PFSELECT:
2147                 r = put_user(vcpu->arch.pfault_select,
2148                              (u64 __user *)reg->addr);
2149                 break;
2150         case KVM_REG_S390_PP:
2151                 r = put_user(vcpu->arch.sie_block->pp,
2152                              (u64 __user *)reg->addr);
2153                 break;
2154         case KVM_REG_S390_GBEA:
2155                 r = put_user(vcpu->arch.sie_block->gbea,
2156                              (u64 __user *)reg->addr);
2157                 break;
2158         default:
2159                 break;
2160         }
2161
2162         return r;
2163 }
2164
2165 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2166                                            struct kvm_one_reg *reg)
2167 {
2168         int r = -EINVAL;
2169         __u64 val;
2170
2171         switch (reg->id) {
2172         case KVM_REG_S390_TODPR:
2173                 r = get_user(vcpu->arch.sie_block->todpr,
2174                              (u32 __user *)reg->addr);
2175                 break;
2176         case KVM_REG_S390_EPOCHDIFF:
2177                 r = get_user(vcpu->arch.sie_block->epoch,
2178                              (u64 __user *)reg->addr);
2179                 break;
2180         case KVM_REG_S390_CPU_TIMER:
2181                 r = get_user(val, (u64 __user *)reg->addr);
2182                 if (!r)
2183                         kvm_s390_set_cpu_timer(vcpu, val);
2184                 break;
2185         case KVM_REG_S390_CLOCK_COMP:
2186                 r = get_user(vcpu->arch.sie_block->ckc,
2187                              (u64 __user *)reg->addr);
2188                 break;
2189         case KVM_REG_S390_PFTOKEN:
2190                 r = get_user(vcpu->arch.pfault_token,
2191                              (u64 __user *)reg->addr);
2192                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2193                         kvm_clear_async_pf_completion_queue(vcpu);
2194                 break;
2195         case KVM_REG_S390_PFCOMPARE:
2196                 r = get_user(vcpu->arch.pfault_compare,
2197                              (u64 __user *)reg->addr);
2198                 break;
2199         case KVM_REG_S390_PFSELECT:
2200                 r = get_user(vcpu->arch.pfault_select,
2201                              (u64 __user *)reg->addr);
2202                 break;
2203         case KVM_REG_S390_PP:
2204                 r = get_user(vcpu->arch.sie_block->pp,
2205                              (u64 __user *)reg->addr);
2206                 break;
2207         case KVM_REG_S390_GBEA:
2208                 r = get_user(vcpu->arch.sie_block->gbea,
2209                              (u64 __user *)reg->addr);
2210                 break;
2211         default:
2212                 break;
2213         }
2214
2215         return r;
2216 }
2217
2218 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2219 {
2220         kvm_s390_vcpu_initial_reset(vcpu);
2221         return 0;
2222 }
2223
2224 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2225 {
2226         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2227         return 0;
2228 }
2229
2230 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2231 {
2232         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2233         return 0;
2234 }
2235
2236 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2237                                   struct kvm_sregs *sregs)
2238 {
2239         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2240         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2241         return 0;
2242 }
2243
2244 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2245                                   struct kvm_sregs *sregs)
2246 {
2247         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2248         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2249         return 0;
2250 }
2251
2252 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2253 {
2254         /* make sure the new values will be lazily loaded */
2255         save_fpu_regs();
2256         if (test_fp_ctl(fpu->fpc))
2257                 return -EINVAL;
2258         current->thread.fpu.fpc = fpu->fpc;
2259         if (MACHINE_HAS_VX)
2260                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2261                                  (freg_t *) fpu->fprs);
2262         else
2263                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2264         return 0;
2265 }
2266
2267 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2268 {
2269         /* make sure we have the latest values */
2270         save_fpu_regs();
2271         if (MACHINE_HAS_VX)
2272                 convert_vx_to_fp((freg_t *) fpu->fprs,
2273                                  (__vector128 *) vcpu->run->s.regs.vrs);
2274         else
2275                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2276         fpu->fpc = current->thread.fpu.fpc;
2277         return 0;
2278 }
2279
2280 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2281 {
2282         int rc = 0;
2283
2284         if (!is_vcpu_stopped(vcpu))
2285                 rc = -EBUSY;
2286         else {
2287                 vcpu->run->psw_mask = psw.mask;
2288                 vcpu->run->psw_addr = psw.addr;
2289         }
2290         return rc;
2291 }
2292
2293 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2294                                   struct kvm_translation *tr)
2295 {
2296         return -EINVAL; /* not implemented yet */
2297 }
2298
2299 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2300                               KVM_GUESTDBG_USE_HW_BP | \
2301                               KVM_GUESTDBG_ENABLE)
2302
2303 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2304                                         struct kvm_guest_debug *dbg)
2305 {
2306         int rc = 0;
2307
2308         vcpu->guest_debug = 0;
2309         kvm_s390_clear_bp_data(vcpu);
2310
2311         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2312                 return -EINVAL;
2313         if (!sclp.has_gpere)
2314                 return -EINVAL;
2315
2316         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2317                 vcpu->guest_debug = dbg->control;
2318                 /* enforce guest PER */
2319                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2320
2321                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2322                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2323         } else {
2324                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2325                 vcpu->arch.guestdbg.last_bp = 0;
2326         }
2327
2328         if (rc) {
2329                 vcpu->guest_debug = 0;
2330                 kvm_s390_clear_bp_data(vcpu);
2331                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2332         }
2333
2334         return rc;
2335 }
2336
2337 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2338                                     struct kvm_mp_state *mp_state)
2339 {
2340         /* CHECK_STOP and LOAD are not supported yet */
2341         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2342                                        KVM_MP_STATE_OPERATING;
2343 }
2344
2345 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2346                                     struct kvm_mp_state *mp_state)
2347 {
2348         int rc = 0;
2349
2350         /* user space knows about this interface - let it control the state */
2351         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2352
2353         switch (mp_state->mp_state) {
2354         case KVM_MP_STATE_STOPPED:
2355                 kvm_s390_vcpu_stop(vcpu);
2356                 break;
2357         case KVM_MP_STATE_OPERATING:
2358                 kvm_s390_vcpu_start(vcpu);
2359                 break;
2360         case KVM_MP_STATE_LOAD:
2361         case KVM_MP_STATE_CHECK_STOP:
2362                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2363         default:
2364                 rc = -ENXIO;
2365         }
2366
2367         return rc;
2368 }
2369
2370 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2371 {
2372         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2373 }
2374
2375 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2376 {
2377 retry:
2378         kvm_s390_vcpu_request_handled(vcpu);
2379         if (!vcpu->requests)
2380                 return 0;
2381         /*
2382          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2383          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2384          * This ensures that the ipte instruction for this request has
2385          * already finished. We might race against a second unmapper that
2386          * wants to set the blocking bit. Lets just retry the request loop.
2387          */
2388         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2389                 int rc;
2390                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2391                                           kvm_s390_get_prefix(vcpu),
2392                                           PAGE_SIZE * 2, PROT_WRITE);
2393                 if (rc) {
2394                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2395                         return rc;
2396                 }
2397                 goto retry;
2398         }
2399
2400         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2401                 vcpu->arch.sie_block->ihcpu = 0xffff;
2402                 goto retry;
2403         }
2404
2405         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2406                 if (!ibs_enabled(vcpu)) {
2407                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2408                         atomic_or(CPUSTAT_IBS,
2409                                         &vcpu->arch.sie_block->cpuflags);
2410                 }
2411                 goto retry;
2412         }
2413
2414         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2415                 if (ibs_enabled(vcpu)) {
2416                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2417                         atomic_andnot(CPUSTAT_IBS,
2418                                           &vcpu->arch.sie_block->cpuflags);
2419                 }
2420                 goto retry;
2421         }
2422
2423         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2424                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2425                 goto retry;
2426         }
2427
2428         /* nothing to do, just clear the request */
2429         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2430
2431         return 0;
2432 }
2433
2434 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2435 {
2436         struct kvm_vcpu *vcpu;
2437         int i;
2438
2439         mutex_lock(&kvm->lock);
2440         preempt_disable();
2441         kvm->arch.epoch = tod - get_tod_clock();
2442         kvm_s390_vcpu_block_all(kvm);
2443         kvm_for_each_vcpu(i, vcpu, kvm)
2444                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2445         kvm_s390_vcpu_unblock_all(kvm);
2446         preempt_enable();
2447         mutex_unlock(&kvm->lock);
2448 }
2449
2450 /**
2451  * kvm_arch_fault_in_page - fault-in guest page if necessary
2452  * @vcpu: The corresponding virtual cpu
2453  * @gpa: Guest physical address
2454  * @writable: Whether the page should be writable or not
2455  *
2456  * Make sure that a guest page has been faulted-in on the host.
2457  *
2458  * Return: Zero on success, negative error code otherwise.
2459  */
2460 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2461 {
2462         return gmap_fault(vcpu->arch.gmap, gpa,
2463                           writable ? FAULT_FLAG_WRITE : 0);
2464 }
2465
2466 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2467                                       unsigned long token)
2468 {
2469         struct kvm_s390_interrupt inti;
2470         struct kvm_s390_irq irq;
2471
2472         if (start_token) {
2473                 irq.u.ext.ext_params2 = token;
2474                 irq.type = KVM_S390_INT_PFAULT_INIT;
2475                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2476         } else {
2477                 inti.type = KVM_S390_INT_PFAULT_DONE;
2478                 inti.parm64 = token;
2479                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2480         }
2481 }
2482
2483 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2484                                      struct kvm_async_pf *work)
2485 {
2486         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2487         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2488 }
2489
2490 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2491                                  struct kvm_async_pf *work)
2492 {
2493         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2494         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2495 }
2496
2497 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2498                                struct kvm_async_pf *work)
2499 {
2500         /* s390 will always inject the page directly */
2501 }
2502
2503 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2504 {
2505         /*
2506          * s390 will always inject the page directly,
2507          * but we still want check_async_completion to cleanup
2508          */
2509         return true;
2510 }
2511
2512 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2513 {
2514         hva_t hva;
2515         struct kvm_arch_async_pf arch;
2516         int rc;
2517
2518         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2519                 return 0;
2520         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2521             vcpu->arch.pfault_compare)
2522                 return 0;
2523         if (psw_extint_disabled(vcpu))
2524                 return 0;
2525         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2526                 return 0;
2527         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2528                 return 0;
2529         if (!vcpu->arch.gmap->pfault_enabled)
2530                 return 0;
2531
2532         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2533         hva += current->thread.gmap_addr & ~PAGE_MASK;
2534         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2535                 return 0;
2536
2537         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2538         return rc;
2539 }
2540
2541 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2542 {
2543         int rc, cpuflags;
2544
2545         /*
2546          * On s390 notifications for arriving pages will be delivered directly
2547          * to the guest but the house keeping for completed pfaults is
2548          * handled outside the worker.
2549          */
2550         kvm_check_async_pf_completion(vcpu);
2551
2552         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2553         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2554
2555         if (need_resched())
2556                 schedule();
2557
2558         if (test_cpu_flag(CIF_MCCK_PENDING))
2559                 s390_handle_mcck();
2560
2561         if (!kvm_is_ucontrol(vcpu->kvm)) {
2562                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2563                 if (rc)
2564                         return rc;
2565         }
2566
2567         rc = kvm_s390_handle_requests(vcpu);
2568         if (rc)
2569                 return rc;
2570
2571         if (guestdbg_enabled(vcpu)) {
2572                 kvm_s390_backup_guest_per_regs(vcpu);
2573                 kvm_s390_patch_guest_per_regs(vcpu);
2574         }
2575
2576         vcpu->arch.sie_block->icptcode = 0;
2577         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2578         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2579         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2580
2581         return 0;
2582 }
2583
2584 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2585 {
2586         struct kvm_s390_pgm_info pgm_info = {
2587                 .code = PGM_ADDRESSING,
2588         };
2589         u8 opcode, ilen;
2590         int rc;
2591
2592         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2593         trace_kvm_s390_sie_fault(vcpu);
2594
2595         /*
2596          * We want to inject an addressing exception, which is defined as a
2597          * suppressing or terminating exception. However, since we came here
2598          * by a DAT access exception, the PSW still points to the faulting
2599          * instruction since DAT exceptions are nullifying. So we've got
2600          * to look up the current opcode to get the length of the instruction
2601          * to be able to forward the PSW.
2602          */
2603         rc = read_guest_instr(vcpu, &opcode, 1);
2604         ilen = insn_length(opcode);
2605         if (rc < 0) {
2606                 return rc;
2607         } else if (rc) {
2608                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2609                  * Forward by arbitrary ilc, injection will take care of
2610                  * nullification if necessary.
2611                  */
2612                 pgm_info = vcpu->arch.pgm;
2613                 ilen = 4;
2614         }
2615         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2616         kvm_s390_forward_psw(vcpu, ilen);
2617         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2618 }
2619
2620 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2621 {
2622         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2623                    vcpu->arch.sie_block->icptcode);
2624         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2625
2626         if (guestdbg_enabled(vcpu))
2627                 kvm_s390_restore_guest_per_regs(vcpu);
2628
2629         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2630         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2631
2632         if (vcpu->arch.sie_block->icptcode > 0) {
2633                 int rc = kvm_handle_sie_intercept(vcpu);
2634
2635                 if (rc != -EOPNOTSUPP)
2636                         return rc;
2637                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2638                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2639                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2640                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2641                 return -EREMOTE;
2642         } else if (exit_reason != -EFAULT) {
2643                 vcpu->stat.exit_null++;
2644                 return 0;
2645         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2646                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2647                 vcpu->run->s390_ucontrol.trans_exc_code =
2648                                                 current->thread.gmap_addr;
2649                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2650                 return -EREMOTE;
2651         } else if (current->thread.gmap_pfault) {
2652                 trace_kvm_s390_major_guest_pfault(vcpu);
2653                 current->thread.gmap_pfault = 0;
2654                 if (kvm_arch_setup_async_pf(vcpu))
2655                         return 0;
2656                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2657         }
2658         return vcpu_post_run_fault_in_sie(vcpu);
2659 }
2660
2661 static int __vcpu_run(struct kvm_vcpu *vcpu)
2662 {
2663         int rc, exit_reason;
2664
2665         /*
2666          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2667          * ning the guest), so that memslots (and other stuff) are protected
2668          */
2669         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2670
2671         do {
2672                 rc = vcpu_pre_run(vcpu);
2673                 if (rc)
2674                         break;
2675
2676                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2677                 /*
2678                  * As PF_VCPU will be used in fault handler, between
2679                  * guest_enter and guest_exit should be no uaccess.
2680                  */
2681                 local_irq_disable();
2682                 guest_enter_irqoff();
2683                 __disable_cpu_timer_accounting(vcpu);
2684                 local_irq_enable();
2685                 exit_reason = sie64a(vcpu->arch.sie_block,
2686                                      vcpu->run->s.regs.gprs);
2687                 local_irq_disable();
2688                 __enable_cpu_timer_accounting(vcpu);
2689                 guest_exit_irqoff();
2690                 local_irq_enable();
2691                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2692
2693                 rc = vcpu_post_run(vcpu, exit_reason);
2694         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2695
2696         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2697         return rc;
2698 }
2699
2700 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2701 {
2702         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2703         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2704         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2705                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2706         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2707                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2708                 /* some control register changes require a tlb flush */
2709                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2710         }
2711         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2712                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2713                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2714                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2715                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2716                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2717         }
2718         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2719                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2720                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2721                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2722                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2723                         kvm_clear_async_pf_completion_queue(vcpu);
2724         }
2725         /*
2726          * If userspace sets the riccb (e.g. after migration) to a valid state,
2727          * we should enable RI here instead of doing the lazy enablement.
2728          */
2729         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2730             test_kvm_facility(vcpu->kvm, 64)) {
2731                 struct runtime_instr_cb *riccb =
2732                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2733
2734                 if (riccb->valid)
2735                         vcpu->arch.sie_block->ecb3 |= 0x01;
2736         }
2737         save_access_regs(vcpu->arch.host_acrs);
2738         restore_access_regs(vcpu->run->s.regs.acrs);
2739
2740         kvm_run->kvm_dirty_regs = 0;
2741 }
2742
2743 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2744 {
2745         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2746         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2747         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2748         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2749         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2750         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2751         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2752         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2753         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2754         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2755         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2756         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2757         save_access_regs(vcpu->run->s.regs.acrs);
2758         restore_access_regs(vcpu->arch.host_acrs);
2759 }
2760
2761 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2762 {
2763         int rc;
2764         sigset_t sigsaved;
2765
2766         if (guestdbg_exit_pending(vcpu)) {
2767                 kvm_s390_prepare_debug_exit(vcpu);
2768                 return 0;
2769         }
2770
2771         if (vcpu->sigset_active)
2772                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2773
2774         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2775                 kvm_s390_vcpu_start(vcpu);
2776         } else if (is_vcpu_stopped(vcpu)) {
2777                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2778                                    vcpu->vcpu_id);
2779                 return -EINVAL;
2780         }
2781
2782         sync_regs(vcpu, kvm_run);
2783         enable_cpu_timer_accounting(vcpu);
2784
2785         might_fault();
2786         rc = __vcpu_run(vcpu);
2787
2788         if (signal_pending(current) && !rc) {
2789                 kvm_run->exit_reason = KVM_EXIT_INTR;
2790                 rc = -EINTR;
2791         }
2792
2793         if (guestdbg_exit_pending(vcpu) && !rc)  {
2794                 kvm_s390_prepare_debug_exit(vcpu);
2795                 rc = 0;
2796         }
2797
2798         if (rc == -EREMOTE) {
2799                 /* userspace support is needed, kvm_run has been prepared */
2800                 rc = 0;
2801         }
2802
2803         disable_cpu_timer_accounting(vcpu);
2804         store_regs(vcpu, kvm_run);
2805
2806         if (vcpu->sigset_active)
2807                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2808
2809         vcpu->stat.exit_userspace++;
2810         return rc;
2811 }
2812
2813 /*
2814  * store status at address
2815  * we use have two special cases:
2816  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2817  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2818  */
2819 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2820 {
2821         unsigned char archmode = 1;
2822         freg_t fprs[NUM_FPRS];
2823         unsigned int px;
2824         u64 clkcomp, cputm;
2825         int rc;
2826
2827         px = kvm_s390_get_prefix(vcpu);
2828         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2829                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2830                         return -EFAULT;
2831                 gpa = 0;
2832         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2833                 if (write_guest_real(vcpu, 163, &archmode, 1))
2834                         return -EFAULT;
2835                 gpa = px;
2836         } else
2837                 gpa -= __LC_FPREGS_SAVE_AREA;
2838
2839         /* manually convert vector registers if necessary */
2840         if (MACHINE_HAS_VX) {
2841                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2842                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2843                                      fprs, 128);
2844         } else {
2845                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2846                                      vcpu->run->s.regs.fprs, 128);
2847         }
2848         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2849                               vcpu->run->s.regs.gprs, 128);
2850         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2851                               &vcpu->arch.sie_block->gpsw, 16);
2852         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2853                               &px, 4);
2854         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2855                               &vcpu->run->s.regs.fpc, 4);
2856         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2857                               &vcpu->arch.sie_block->todpr, 4);
2858         cputm = kvm_s390_get_cpu_timer(vcpu);
2859         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2860                               &cputm, 8);
2861         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2862         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2863                               &clkcomp, 8);
2864         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2865                               &vcpu->run->s.regs.acrs, 64);
2866         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2867                               &vcpu->arch.sie_block->gcr, 128);
2868         return rc ? -EFAULT : 0;
2869 }
2870
2871 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2872 {
2873         /*
2874          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2875          * switch in the run ioctl. Let's update our copies before we save
2876          * it into the save area
2877          */
2878         save_fpu_regs();
2879         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2880         save_access_regs(vcpu->run->s.regs.acrs);
2881
2882         return kvm_s390_store_status_unloaded(vcpu, addr);
2883 }
2884
2885 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2886 {
2887         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2888         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2889 }
2890
2891 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2892 {
2893         unsigned int i;
2894         struct kvm_vcpu *vcpu;
2895
2896         kvm_for_each_vcpu(i, vcpu, kvm) {
2897                 __disable_ibs_on_vcpu(vcpu);
2898         }
2899 }
2900
2901 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2902 {
2903         if (!sclp.has_ibs)
2904                 return;
2905         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2906         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2907 }
2908
2909 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2910 {
2911         int i, online_vcpus, started_vcpus = 0;
2912
2913         if (!is_vcpu_stopped(vcpu))
2914                 return;
2915
2916         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2917         /* Only one cpu at a time may enter/leave the STOPPED state. */
2918         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2919         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2920
2921         for (i = 0; i < online_vcpus; i++) {
2922                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2923                         started_vcpus++;
2924         }
2925
2926         if (started_vcpus == 0) {
2927                 /* we're the only active VCPU -> speed it up */
2928                 __enable_ibs_on_vcpu(vcpu);
2929         } else if (started_vcpus == 1) {
2930                 /*
2931                  * As we are starting a second VCPU, we have to disable
2932                  * the IBS facility on all VCPUs to remove potentially
2933                  * oustanding ENABLE requests.
2934                  */
2935                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2936         }
2937
2938         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2939         /*
2940          * Another VCPU might have used IBS while we were offline.
2941          * Let's play safe and flush the VCPU at startup.
2942          */
2943         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2944         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2945         return;
2946 }
2947
2948 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2949 {
2950         int i, online_vcpus, started_vcpus = 0;
2951         struct kvm_vcpu *started_vcpu = NULL;
2952
2953         if (is_vcpu_stopped(vcpu))
2954                 return;
2955
2956         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2957         /* Only one cpu at a time may enter/leave the STOPPED state. */
2958         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2959         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2960
2961         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2962         kvm_s390_clear_stop_irq(vcpu);
2963
2964         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2965         __disable_ibs_on_vcpu(vcpu);
2966
2967         for (i = 0; i < online_vcpus; i++) {
2968                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2969                         started_vcpus++;
2970                         started_vcpu = vcpu->kvm->vcpus[i];
2971                 }
2972         }
2973
2974         if (started_vcpus == 1) {
2975                 /*
2976                  * As we only have one VCPU left, we want to enable the
2977                  * IBS facility for that VCPU to speed it up.
2978                  */
2979                 __enable_ibs_on_vcpu(started_vcpu);
2980         }
2981
2982         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2983         return;
2984 }
2985
2986 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2987                                      struct kvm_enable_cap *cap)
2988 {
2989         int r;
2990
2991         if (cap->flags)
2992                 return -EINVAL;
2993
2994         switch (cap->cap) {
2995         case KVM_CAP_S390_CSS_SUPPORT:
2996                 if (!vcpu->kvm->arch.css_support) {
2997                         vcpu->kvm->arch.css_support = 1;
2998                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2999                         trace_kvm_s390_enable_css(vcpu->kvm);
3000                 }
3001                 r = 0;
3002                 break;
3003         default:
3004                 r = -EINVAL;
3005                 break;
3006         }
3007         return r;
3008 }
3009
3010 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3011                                   struct kvm_s390_mem_op *mop)
3012 {
3013         void __user *uaddr = (void __user *)mop->buf;
3014         void *tmpbuf = NULL;
3015         int r, srcu_idx;
3016         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3017                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3018
3019         if (mop->flags & ~supported_flags)
3020                 return -EINVAL;
3021
3022         if (mop->size > MEM_OP_MAX_SIZE)
3023                 return -E2BIG;
3024
3025         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3026                 tmpbuf = vmalloc(mop->size);
3027                 if (!tmpbuf)
3028                         return -ENOMEM;
3029         }
3030
3031         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3032
3033         switch (mop->op) {
3034         case KVM_S390_MEMOP_LOGICAL_READ:
3035                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3036                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3037                                             mop->size, GACC_FETCH);
3038                         break;
3039                 }
3040                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3041                 if (r == 0) {
3042                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3043                                 r = -EFAULT;
3044                 }
3045                 break;
3046         case KVM_S390_MEMOP_LOGICAL_WRITE:
3047                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3048                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3049                                             mop->size, GACC_STORE);
3050                         break;
3051                 }
3052                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3053                         r = -EFAULT;
3054                         break;
3055                 }
3056                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3057                 break;
3058         default:
3059                 r = -EINVAL;
3060         }
3061
3062         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3063
3064         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3065                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3066
3067         vfree(tmpbuf);
3068         return r;
3069 }
3070
3071 long kvm_arch_vcpu_ioctl(struct file *filp,
3072                          unsigned int ioctl, unsigned long arg)
3073 {
3074         struct kvm_vcpu *vcpu = filp->private_data;
3075         void __user *argp = (void __user *)arg;
3076         int idx;
3077         long r;
3078
3079         switch (ioctl) {
3080         case KVM_S390_IRQ: {
3081                 struct kvm_s390_irq s390irq;
3082
3083                 r = -EFAULT;
3084                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3085                         break;
3086                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3087                 break;
3088         }
3089         case KVM_S390_INTERRUPT: {
3090                 struct kvm_s390_interrupt s390int;
3091                 struct kvm_s390_irq s390irq;
3092
3093                 r = -EFAULT;
3094                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3095                         break;
3096                 if (s390int_to_s390irq(&s390int, &s390irq))
3097                         return -EINVAL;
3098                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3099                 break;
3100         }
3101         case KVM_S390_STORE_STATUS:
3102                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3103                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3104                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3105                 break;
3106         case KVM_S390_SET_INITIAL_PSW: {
3107                 psw_t psw;
3108
3109                 r = -EFAULT;
3110                 if (copy_from_user(&psw, argp, sizeof(psw)))
3111                         break;
3112                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3113                 break;
3114         }
3115         case KVM_S390_INITIAL_RESET:
3116                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3117                 break;
3118         case KVM_SET_ONE_REG:
3119         case KVM_GET_ONE_REG: {
3120                 struct kvm_one_reg reg;
3121                 r = -EFAULT;
3122                 if (copy_from_user(&reg, argp, sizeof(reg)))
3123                         break;
3124                 if (ioctl == KVM_SET_ONE_REG)
3125                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3126                 else
3127                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3128                 break;
3129         }
3130 #ifdef CONFIG_KVM_S390_UCONTROL
3131         case KVM_S390_UCAS_MAP: {
3132                 struct kvm_s390_ucas_mapping ucasmap;
3133
3134                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3135                         r = -EFAULT;
3136                         break;
3137                 }
3138
3139                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3140                         r = -EINVAL;
3141                         break;
3142                 }
3143
3144                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3145                                      ucasmap.vcpu_addr, ucasmap.length);
3146                 break;
3147         }
3148         case KVM_S390_UCAS_UNMAP: {
3149                 struct kvm_s390_ucas_mapping ucasmap;
3150
3151                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3152                         r = -EFAULT;
3153                         break;
3154                 }
3155
3156                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3157                         r = -EINVAL;
3158                         break;
3159                 }
3160
3161                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3162                         ucasmap.length);
3163                 break;
3164         }
3165 #endif
3166         case KVM_S390_VCPU_FAULT: {
3167                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3168                 break;
3169         }
3170         case KVM_ENABLE_CAP:
3171         {
3172                 struct kvm_enable_cap cap;
3173                 r = -EFAULT;
3174                 if (copy_from_user(&cap, argp, sizeof(cap)))
3175                         break;
3176                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3177                 break;
3178         }
3179         case KVM_S390_MEM_OP: {
3180                 struct kvm_s390_mem_op mem_op;
3181
3182                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3183                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3184                 else
3185                         r = -EFAULT;
3186                 break;
3187         }
3188         case KVM_S390_SET_IRQ_STATE: {
3189                 struct kvm_s390_irq_state irq_state;
3190
3191                 r = -EFAULT;
3192                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3193                         break;
3194                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3195                     irq_state.len == 0 ||
3196                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3197                         r = -EINVAL;
3198                         break;
3199                 }
3200                 r = kvm_s390_set_irq_state(vcpu,
3201                                            (void __user *) irq_state.buf,
3202                                            irq_state.len);
3203                 break;
3204         }
3205         case KVM_S390_GET_IRQ_STATE: {
3206                 struct kvm_s390_irq_state irq_state;
3207
3208                 r = -EFAULT;
3209                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3210                         break;
3211                 if (irq_state.len == 0) {
3212                         r = -EINVAL;
3213                         break;
3214                 }
3215                 r = kvm_s390_get_irq_state(vcpu,
3216                                            (__u8 __user *)  irq_state.buf,
3217                                            irq_state.len);
3218                 break;
3219         }
3220         default:
3221                 r = -ENOTTY;
3222         }
3223         return r;
3224 }
3225
3226 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3227 {
3228 #ifdef CONFIG_KVM_S390_UCONTROL
3229         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3230                  && (kvm_is_ucontrol(vcpu->kvm))) {
3231                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3232                 get_page(vmf->page);
3233                 return 0;
3234         }
3235 #endif
3236         return VM_FAULT_SIGBUS;
3237 }
3238
3239 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3240                             unsigned long npages)
3241 {
3242         return 0;
3243 }
3244
3245 /* Section: memory related */
3246 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3247                                    struct kvm_memory_slot *memslot,
3248                                    const struct kvm_userspace_memory_region *mem,
3249                                    enum kvm_mr_change change)
3250 {
3251         /* A few sanity checks. We can have memory slots which have to be
3252            located/ended at a segment boundary (1MB). The memory in userland is
3253            ok to be fragmented into various different vmas. It is okay to mmap()
3254            and munmap() stuff in this slot after doing this call at any time */
3255
3256         if (mem->userspace_addr & 0xffffful)
3257                 return -EINVAL;
3258
3259         if (mem->memory_size & 0xffffful)
3260                 return -EINVAL;
3261
3262         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3263                 return -EINVAL;
3264
3265         return 0;
3266 }
3267
3268 void kvm_arch_commit_memory_region(struct kvm *kvm,
3269                                 const struct kvm_userspace_memory_region *mem,
3270                                 const struct kvm_memory_slot *old,
3271                                 const struct kvm_memory_slot *new,
3272                                 enum kvm_mr_change change)
3273 {
3274         int rc;
3275
3276         /* If the basics of the memslot do not change, we do not want
3277          * to update the gmap. Every update causes several unnecessary
3278          * segment translation exceptions. This is usually handled just
3279          * fine by the normal fault handler + gmap, but it will also
3280          * cause faults on the prefix page of running guest CPUs.
3281          */
3282         if (old->userspace_addr == mem->userspace_addr &&
3283             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3284             old->npages * PAGE_SIZE == mem->memory_size)
3285                 return;
3286
3287         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3288                 mem->guest_phys_addr, mem->memory_size);
3289         if (rc)
3290                 pr_warn("failed to commit memory region\n");
3291         return;
3292 }
3293
3294 static inline unsigned long nonhyp_mask(int i)
3295 {
3296         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3297
3298         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3299 }
3300
3301 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3302 {
3303         vcpu->valid_wakeup = false;
3304 }
3305
3306 static int __init kvm_s390_init(void)
3307 {
3308         int i;
3309
3310         if (!sclp.has_sief2) {
3311                 pr_info("SIE not available\n");
3312                 return -ENODEV;
3313         }
3314
3315         for (i = 0; i < 16; i++)
3316                 kvm_s390_fac_list_mask[i] |=
3317                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3318
3319         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3320 }
3321
3322 static void __exit kvm_s390_exit(void)
3323 {
3324         kvm_exit();
3325 }
3326
3327 module_init(kvm_s390_init);
3328 module_exit(kvm_s390_exit);
3329
3330 /*
3331  * Enable autoloading of the kvm module.
3332  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3333  * since x86 takes a different approach.
3334  */
3335 #include <linux/miscdevice.h>
3336 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3337 MODULE_ALIAS("devname:kvm");