arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include "kvm-s390.h"
  44 #include "gaccess.h"
  45
  46 #define KMSG_COMPONENT "kvm-s390"
  47 #undef pr_fmt
  48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60
  61 struct kvm_stats_debugfs_item debugfs_entries[] = {
  62         { "userspace_handled", VCPU_STAT(exit_userspace) },
  63         { "exit_null", VCPU_STAT(exit_null) },
  64         { "exit_validity", VCPU_STAT(exit_validity) },
  65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  66         { "exit_external_request", VCPU_STAT(exit_external_request) },
  67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  68         { "exit_instruction", VCPU_STAT(exit_instruction) },
  69         { "exit_pei", VCPU_STAT(exit_pei) },
  70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  92         { "instruction_spx", VCPU_STAT(instruction_spx) },
  93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  94         { "instruction_stap", VCPU_STAT(instruction_stap) },
  95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  99         { "instruction_essa", VCPU_STAT(instruction_essa) },
 100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 104         { "instruction_sie", VCPU_STAT(instruction_sie) },
 105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 121         { "diagnose_10", VCPU_STAT(diagnose_10) },
 122         { "diagnose_44", VCPU_STAT(diagnose_44) },
 123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 124         { "diagnose_258", VCPU_STAT(diagnose_258) },
 125         { "diagnose_308", VCPU_STAT(diagnose_308) },
 126         { "diagnose_500", VCPU_STAT(diagnose_500) },
 127         { NULL }
 128 };
 129
 130 /* allow nested virtualization in KVM (if enabled by user space) */
 131 static int nested;
 132 module_param(nested, int, S_IRUGO);
 133 MODULE_PARM_DESC(nested, "Nested virtualization support");
 134
 135 /* upper facilities limit for kvm */
 136 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 137
 138 unsigned long kvm_s390_fac_list_mask_size(void)
 139 {
 140         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 141         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 142 }
 143
 144 /* available cpu features supported by kvm */
 145 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 146 /* available subfunctions indicated via query / "test bit" */
 147 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 148
 149 static struct gmap_notifier gmap_notifier;
 150 static struct gmap_notifier vsie_gmap_notifier;
 151 debug_info_t *kvm_s390_dbf;
 152
 153 /* Section: not file related */
 154 int kvm_arch_hardware_enable(void)
 155 {
 156         /* every s390 is virtualization enabled ;-) */
 157         return 0;
 158 }
 159
 160 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 161                               unsigned long end);
 162
 163 /*
 164  * This callback is executed during stop_machine(). All CPUs are therefore
 165  * temporarily stopped. In order not to change guest behavior, we have to
 166  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 167  * so a CPU won't be stopped while calculating with the epoch.
 168  */
 169 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 170                           void *v)
 171 {
 172         struct kvm *kvm;
 173         struct kvm_vcpu *vcpu;
 174         int i;
 175         unsigned long long *delta = v;
 176
 177         list_for_each_entry(kvm, &vm_list, vm_list) {
 178                 kvm->arch.epoch -= *delta;
 179                 kvm_for_each_vcpu(i, vcpu, kvm) {
 180                         vcpu->arch.sie_block->epoch -= *delta;
 181                         if (vcpu->arch.cputm_enabled)
 182                                 vcpu->arch.cputm_start += *delta;
 183                         if (vcpu->arch.vsie_block)
 184                                 vcpu->arch.vsie_block->epoch -= *delta;
 185                 }
 186         }
 187         return NOTIFY_OK;
 188 }
 189
 190 static struct notifier_block kvm_clock_notifier = {
 191         .notifier_call = kvm_clock_sync,
 192 };
 193
 194 int kvm_arch_hardware_setup(void)
 195 {
 196         gmap_notifier.notifier_call = kvm_gmap_notifier;
 197         gmap_register_pte_notifier(&gmap_notifier);
 198         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 199         gmap_register_pte_notifier(&vsie_gmap_notifier);
 200         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 201                                        &kvm_clock_notifier);
 202         return 0;
 203 }
 204
 205 void kvm_arch_hardware_unsetup(void)
 206 {
 207         gmap_unregister_pte_notifier(&gmap_notifier);
 208         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 209         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 210                                          &kvm_clock_notifier);
 211 }
 212
 213 static void allow_cpu_feat(unsigned long nr)
 214 {
 215         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 216 }
 217
 218 static inline int plo_test_bit(unsigned char nr)
 219 {
 220         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 221         int cc = 3; /* subfunction not available */
 222
 223         asm volatile(
 224                 /* Parameter registers are ignored for "test bit" */
 225                 "       plo     0,0,0,0(0)\n"
 226                 "       ipm     %0\n"
 227                 "       srl     %0,28\n"
 228                 : "=d" (cc)
 229                 : "d" (r0)
 230                 : "cc");
 231         return cc == 0;
 232 }
 233
 234 static void kvm_s390_cpu_feat_init(void)
 235 {
 236         int i;
 237
 238         for (i = 0; i < 256; ++i) {
 239                 if (plo_test_bit(i))
 240                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 241         }
 242
 243         if (test_facility(28)) /* TOD-clock steering */
 244                 ptff(kvm_s390_available_subfunc.ptff,
 245                      sizeof(kvm_s390_available_subfunc.ptff),
 246                      PTFF_QAF);
 247
 248         if (test_facility(17)) { /* MSA */
 249                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 250                               kvm_s390_available_subfunc.kmac);
 251                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 252                               kvm_s390_available_subfunc.kmc);
 253                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 254                               kvm_s390_available_subfunc.km);
 255                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 256                               kvm_s390_available_subfunc.kimd);
 257                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 258                               kvm_s390_available_subfunc.klmd);
 259         }
 260         if (test_facility(76)) /* MSA3 */
 261                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 262                               kvm_s390_available_subfunc.pckmo);
 263         if (test_facility(77)) { /* MSA4 */
 264                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 265                               kvm_s390_available_subfunc.kmctr);
 266                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 267                               kvm_s390_available_subfunc.kmf);
 268                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 269                               kvm_s390_available_subfunc.kmo);
 270                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 271                               kvm_s390_available_subfunc.pcc);
 272         }
 273         if (test_facility(57)) /* MSA5 */
 274                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
 275                               kvm_s390_available_subfunc.ppno);
 276
 277         if (MACHINE_HAS_ESOP)
 278                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 279         /*
 280          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 281          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 282          */
 283         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 284             !test_facility(3) || !nested)
 285                 return;
 286         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 287         if (sclp.has_64bscao)
 288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 289         if (sclp.has_siif)
 290                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 291         if (sclp.has_gpere)
 292                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 293         if (sclp.has_gsls)
 294                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 295         if (sclp.has_ib)
 296                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 297         if (sclp.has_cei)
 298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 299         if (sclp.has_ibs)
 300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 301         /*
 302          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 303          * all skey handling functions read/set the skey from the PGSTE
 304          * instead of the real storage key.
 305          *
 306          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 307          * pages being detected as preserved although they are resident.
 308          *
 309          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 310          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 311          *
 312          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 313          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 314          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 315          *
 316          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 317          * cannot easily shadow the SCA because of the ipte lock.
 318          */
 319 }
 320
 321 int kvm_arch_init(void *opaque)
 322 {
 323         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 324         if (!kvm_s390_dbf)
 325                 return -ENOMEM;
 326
 327         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 328                 debug_unregister(kvm_s390_dbf);
 329                 return -ENOMEM;
 330         }
 331
 332         kvm_s390_cpu_feat_init();
 333
 334         /* Register floating interrupt controller interface. */
 335         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 336 }
 337
 338 void kvm_arch_exit(void)
 339 {
 340         debug_unregister(kvm_s390_dbf);
 341 }
 342
 343 /* Section: device related */
 344 long kvm_arch_dev_ioctl(struct file *filp,
 345                         unsigned int ioctl, unsigned long arg)
 346 {
 347         if (ioctl == KVM_S390_ENABLE_SIE)
 348                 return s390_enable_sie();
 349         return -EINVAL;
 350 }
 351
 352 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 353 {
 354         int r;
 355
 356         switch (ext) {
 357         case KVM_CAP_S390_PSW:
 358         case KVM_CAP_S390_GMAP:
 359         case KVM_CAP_SYNC_MMU:
 360 #ifdef CONFIG_KVM_S390_UCONTROL
 361         case KVM_CAP_S390_UCONTROL:
 362 #endif
 363         case KVM_CAP_ASYNC_PF:
 364         case KVM_CAP_SYNC_REGS:
 365         case KVM_CAP_ONE_REG:
 366         case KVM_CAP_ENABLE_CAP:
 367         case KVM_CAP_S390_CSS_SUPPORT:
 368         case KVM_CAP_IOEVENTFD:
 369         case KVM_CAP_DEVICE_CTRL:
 370         case KVM_CAP_ENABLE_CAP_VM:
 371         case KVM_CAP_S390_IRQCHIP:
 372         case KVM_CAP_VM_ATTRIBUTES:
 373         case KVM_CAP_MP_STATE:
 374         case KVM_CAP_S390_INJECT_IRQ:
 375         case KVM_CAP_S390_USER_SIGP:
 376         case KVM_CAP_S390_USER_STSI:
 377         case KVM_CAP_S390_SKEYS:
 378         case KVM_CAP_S390_IRQ_STATE:
 379         case KVM_CAP_S390_USER_INSTR0:
 380                 r = 1;
 381                 break;
 382         case KVM_CAP_S390_MEM_OP:
 383                 r = MEM_OP_MAX_SIZE;
 384                 break;
 385         case KVM_CAP_NR_VCPUS:
 386         case KVM_CAP_MAX_VCPUS:
 387                 r = KVM_S390_BSCA_CPU_SLOTS;
 388                 if (!kvm_s390_use_sca_entries())
 389                         r = KVM_MAX_VCPUS;
 390                 else if (sclp.has_esca && sclp.has_64bscao)
 391                         r = KVM_S390_ESCA_CPU_SLOTS;
 392                 break;
 393         case KVM_CAP_NR_MEMSLOTS:
 394                 r = KVM_USER_MEM_SLOTS;
 395                 break;
 396         case KVM_CAP_S390_COW:
 397                 r = MACHINE_HAS_ESOP;
 398                 break;
 399         case KVM_CAP_S390_VECTOR_REGISTERS:
 400                 r = MACHINE_HAS_VX;
 401                 break;
 402         case KVM_CAP_S390_RI:
 403                 r = test_facility(64);
 404                 break;
 405         default:
 406                 r = 0;
 407         }
 408         return r;
 409 }
 410
 411 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 412                                         struct kvm_memory_slot *memslot)
 413 {
 414         gfn_t cur_gfn, last_gfn;
 415         unsigned long address;
 416         struct gmap *gmap = kvm->arch.gmap;
 417
 418         /* Loop over all guest pages */
 419         last_gfn = memslot->base_gfn + memslot->npages;
 420         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 421                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 422
 423                 if (test_and_clear_guest_dirty(gmap->mm, address))
 424                         mark_page_dirty(kvm, cur_gfn);
 425                 if (fatal_signal_pending(current))
 426                         return;
 427                 cond_resched();
 428         }
 429 }
 430
 431 /* Section: vm related */
 432 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 433
 434 /*
 435  * Get (and clear) the dirty memory log for a memory slot.
 436  */
 437 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 438                                struct kvm_dirty_log *log)
 439 {
 440         int r;
 441         unsigned long n;
 442         struct kvm_memslots *slots;
 443         struct kvm_memory_slot *memslot;
 444         int is_dirty = 0;
 445
 446         mutex_lock(&kvm->slots_lock);
 447
 448         r = -EINVAL;
 449         if (log->slot >= KVM_USER_MEM_SLOTS)
 450                 goto out;
 451
 452         slots = kvm_memslots(kvm);
 453         memslot = id_to_memslot(slots, log->slot);
 454         r = -ENOENT;
 455         if (!memslot->dirty_bitmap)
 456                 goto out;
 457
 458         kvm_s390_sync_dirty_log(kvm, memslot);
 459         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 460         if (r)
 461                 goto out;
 462
 463         /* Clear the dirty log */
 464         if (is_dirty) {
 465                 n = kvm_dirty_bitmap_bytes(memslot);
 466                 memset(memslot->dirty_bitmap, 0, n);
 467         }
 468         r = 0;
 469 out:
 470         mutex_unlock(&kvm->slots_lock);
 471         return r;
 472 }
 473
 474 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 475 {
 476         unsigned int i;
 477         struct kvm_vcpu *vcpu;
 478
 479         kvm_for_each_vcpu(i, vcpu, kvm) {
 480                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 481         }
 482 }
 483
 484 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 485 {
 486         int r;
 487
 488         if (cap->flags)
 489                 return -EINVAL;
 490
 491         switch (cap->cap) {
 492         case KVM_CAP_S390_IRQCHIP:
 493                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 494                 kvm->arch.use_irqchip = 1;
 495                 r = 0;
 496                 break;
 497         case KVM_CAP_S390_USER_SIGP:
 498                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 499                 kvm->arch.user_sigp = 1;
 500                 r = 0;
 501                 break;
 502         case KVM_CAP_S390_VECTOR_REGISTERS:
 503                 mutex_lock(&kvm->lock);
 504                 if (kvm->created_vcpus) {
 505                         r = -EBUSY;
 506                 } else if (MACHINE_HAS_VX) {
 507                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 508                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 509                         r = 0;
 510                 } else
 511                         r = -EINVAL;
 512                 mutex_unlock(&kvm->lock);
 513                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 514                          r ? "(not available)" : "(success)");
 515                 break;
 516         case KVM_CAP_S390_RI:
 517                 r = -EINVAL;
 518                 mutex_lock(&kvm->lock);
 519                 if (kvm->created_vcpus) {
 520                         r = -EBUSY;
 521                 } else if (test_facility(64)) {
 522                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 523                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 524                         r = 0;
 525                 }
 526                 mutex_unlock(&kvm->lock);
 527                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 528                          r ? "(not available)" : "(success)");
 529                 break;
 530         case KVM_CAP_S390_USER_STSI:
 531                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 532                 kvm->arch.user_stsi = 1;
 533                 r = 0;
 534                 break;
 535         case KVM_CAP_S390_USER_INSTR0:
 536                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 537                 kvm->arch.user_instr0 = 1;
 538                 icpt_operexc_on_all_vcpus(kvm);
 539                 r = 0;
 540                 break;
 541         default:
 542                 r = -EINVAL;
 543                 break;
 544         }
 545         return r;
 546 }
 547
 548 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 549 {
 550         int ret;
 551
 552         switch (attr->attr) {
 553         case KVM_S390_VM_MEM_LIMIT_SIZE:
 554                 ret = 0;
 555                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 556                          kvm->arch.mem_limit);
 557                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 558                         ret = -EFAULT;
 559                 break;
 560         default:
 561                 ret = -ENXIO;
 562                 break;
 563         }
 564         return ret;
 565 }
 566
 567 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 568 {
 569         int ret;
 570         unsigned int idx;
 571         switch (attr->attr) {
 572         case KVM_S390_VM_MEM_ENABLE_CMMA:
 573                 ret = -ENXIO;
 574                 if (!sclp.has_cmma)
 575                         break;
 576
 577                 ret = -EBUSY;
 578                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 579                 mutex_lock(&kvm->lock);
 580                 if (!kvm->created_vcpus) {
 581                         kvm->arch.use_cmma = 1;
 582                         ret = 0;
 583                 }
 584                 mutex_unlock(&kvm->lock);
 585                 break;
 586         case KVM_S390_VM_MEM_CLR_CMMA:
 587                 ret = -ENXIO;
 588                 if (!sclp.has_cmma)
 589                         break;
 590                 ret = -EINVAL;
 591                 if (!kvm->arch.use_cmma)
 592                         break;
 593
 594                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 595                 mutex_lock(&kvm->lock);
 596                 idx = srcu_read_lock(&kvm->srcu);
 597                 s390_reset_cmma(kvm->arch.gmap->mm);
 598                 srcu_read_unlock(&kvm->srcu, idx);
 599                 mutex_unlock(&kvm->lock);
 600                 ret = 0;
 601                 break;
 602         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 603                 unsigned long new_limit;
 604
 605                 if (kvm_is_ucontrol(kvm))
 606                         return -EINVAL;
 607
 608                 if (get_user(new_limit, (u64 __user *)attr->addr))
 609                         return -EFAULT;
 610
 611                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 612                     new_limit > kvm->arch.mem_limit)
 613                         return -E2BIG;
 614
 615                 if (!new_limit)
 616                         return -EINVAL;
 617
 618                 /* gmap_create takes last usable address */
 619                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 620                         new_limit -= 1;
 621
 622                 ret = -EBUSY;
 623                 mutex_lock(&kvm->lock);
 624                 if (!kvm->created_vcpus) {
 625                         /* gmap_create will round the limit up */
 626                         struct gmap *new = gmap_create(current->mm, new_limit);
 627
 628                         if (!new) {
 629                                 ret = -ENOMEM;
 630                         } else {
 631                                 gmap_remove(kvm->arch.gmap);
 632                                 new->private = kvm;
 633                                 kvm->arch.gmap = new;
 634                                 ret = 0;
 635                         }
 636                 }
 637                 mutex_unlock(&kvm->lock);
 638                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 639                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 640                          (void *) kvm->arch.gmap->asce);
 641                 break;
 642         }
 643         default:
 644                 ret = -ENXIO;
 645                 break;
 646         }
 647         return ret;
 648 }
 649
 650 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 651
 652 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 653 {
 654         struct kvm_vcpu *vcpu;
 655         int i;
 656
 657         if (!test_kvm_facility(kvm, 76))
 658                 return -EINVAL;
 659
 660         mutex_lock(&kvm->lock);
 661         switch (attr->attr) {
 662         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 663                 get_random_bytes(
 664                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 665                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 666                 kvm->arch.crypto.aes_kw = 1;
 667                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 668                 break;
 669         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 670                 get_random_bytes(
 671                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 672                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 673                 kvm->arch.crypto.dea_kw = 1;
 674                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 675                 break;
 676         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 677                 kvm->arch.crypto.aes_kw = 0;
 678                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 679                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 680                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 681                 break;
 682         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 683                 kvm->arch.crypto.dea_kw = 0;
 684                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 685                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 686                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 687                 break;
 688         default:
 689                 mutex_unlock(&kvm->lock);
 690                 return -ENXIO;
 691         }
 692
 693         kvm_for_each_vcpu(i, vcpu, kvm) {
 694                 kvm_s390_vcpu_crypto_setup(vcpu);
 695                 exit_sie(vcpu);
 696         }
 697         mutex_unlock(&kvm->lock);
 698         return 0;
 699 }
 700
 701 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 702 {
 703         u8 gtod_high;
 704
 705         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 706                                            sizeof(gtod_high)))
 707                 return -EFAULT;
 708
 709         if (gtod_high != 0)
 710                 return -EINVAL;
 711         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 712
 713         return 0;
 714 }
 715
 716 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 717 {
 718         u64 gtod;
 719
 720         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 721                 return -EFAULT;
 722
 723         kvm_s390_set_tod_clock(kvm, gtod);
 724         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 725         return 0;
 726 }
 727
 728 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 729 {
 730         int ret;
 731
 732         if (attr->flags)
 733                 return -EINVAL;
 734
 735         switch (attr->attr) {
 736         case KVM_S390_VM_TOD_HIGH:
 737                 ret = kvm_s390_set_tod_high(kvm, attr);
 738                 break;
 739         case KVM_S390_VM_TOD_LOW:
 740                 ret = kvm_s390_set_tod_low(kvm, attr);
 741                 break;
 742         default:
 743                 ret = -ENXIO;
 744                 break;
 745         }
 746         return ret;
 747 }
 748
 749 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 750 {
 751         u8 gtod_high = 0;
 752
 753         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 754                                          sizeof(gtod_high)))
 755                 return -EFAULT;
 756         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 757
 758         return 0;
 759 }
 760
 761 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 762 {
 763         u64 gtod;
 764
 765         gtod = kvm_s390_get_tod_clock_fast(kvm);
 766         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 767                 return -EFAULT;
 768         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 769
 770         return 0;
 771 }
 772
 773 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 774 {
 775         int ret;
 776
 777         if (attr->flags)
 778                 return -EINVAL;
 779
 780         switch (attr->attr) {
 781         case KVM_S390_VM_TOD_HIGH:
 782                 ret = kvm_s390_get_tod_high(kvm, attr);
 783                 break;
 784         case KVM_S390_VM_TOD_LOW:
 785                 ret = kvm_s390_get_tod_low(kvm, attr);
 786                 break;
 787         default:
 788                 ret = -ENXIO;
 789                 break;
 790         }
 791         return ret;
 792 }
 793
 794 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 795 {
 796         struct kvm_s390_vm_cpu_processor *proc;
 797         u16 lowest_ibc, unblocked_ibc;
 798         int ret = 0;
 799
 800         mutex_lock(&kvm->lock);
 801         if (kvm->created_vcpus) {
 802                 ret = -EBUSY;
 803                 goto out;
 804         }
 805         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 806         if (!proc) {
 807                 ret = -ENOMEM;
 808                 goto out;
 809         }
 810         if (!copy_from_user(proc, (void __user *)attr->addr,
 811                             sizeof(*proc))) {
 812                 kvm->arch.model.cpuid = proc->cpuid;
 813                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 814                 unblocked_ibc = sclp.ibc & 0xfff;
 815                 if (lowest_ibc && proc->ibc) {
 816                         if (proc->ibc > unblocked_ibc)
 817                                 kvm->arch.model.ibc = unblocked_ibc;
 818                         else if (proc->ibc < lowest_ibc)
 819                                 kvm->arch.model.ibc = lowest_ibc;
 820                         else
 821                                 kvm->arch.model.ibc = proc->ibc;
 822                 }
 823                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 824                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 825         } else
 826                 ret = -EFAULT;
 827         kfree(proc);
 828 out:
 829         mutex_unlock(&kvm->lock);
 830         return ret;
 831 }
 832
 833 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 834                                        struct kvm_device_attr *attr)
 835 {
 836         struct kvm_s390_vm_cpu_feat data;
 837         int ret = -EBUSY;
 838
 839         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 840                 return -EFAULT;
 841         if (!bitmap_subset((unsigned long *) data.feat,
 842                            kvm_s390_available_cpu_feat,
 843                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 844                 return -EINVAL;
 845
 846         mutex_lock(&kvm->lock);
 847         if (!atomic_read(&kvm->online_vcpus)) {
 848                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 849                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 850                 ret = 0;
 851         }
 852         mutex_unlock(&kvm->lock);
 853         return ret;
 854 }
 855
 856 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 857                                           struct kvm_device_attr *attr)
 858 {
 859         /*
 860          * Once supported by kernel + hw, we have to store the subfunctions
 861          * in kvm->arch and remember that user space configured them.
 862          */
 863         return -ENXIO;
 864 }
 865
 866 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 867 {
 868         int ret = -ENXIO;
 869
 870         switch (attr->attr) {
 871         case KVM_S390_VM_CPU_PROCESSOR:
 872                 ret = kvm_s390_set_processor(kvm, attr);
 873                 break;
 874         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 875                 ret = kvm_s390_set_processor_feat(kvm, attr);
 876                 break;
 877         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 878                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 879                 break;
 880         }
 881         return ret;
 882 }
 883
 884 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 885 {
 886         struct kvm_s390_vm_cpu_processor *proc;
 887         int ret = 0;
 888
 889         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 890         if (!proc) {
 891                 ret = -ENOMEM;
 892                 goto out;
 893         }
 894         proc->cpuid = kvm->arch.model.cpuid;
 895         proc->ibc = kvm->arch.model.ibc;
 896         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 897                S390_ARCH_FAC_LIST_SIZE_BYTE);
 898         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 899                 ret = -EFAULT;
 900         kfree(proc);
 901 out:
 902         return ret;
 903 }
 904
 905 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 906 {
 907         struct kvm_s390_vm_cpu_machine *mach;
 908         int ret = 0;
 909
 910         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 911         if (!mach) {
 912                 ret = -ENOMEM;
 913                 goto out;
 914         }
 915         get_cpu_id((struct cpuid *) &mach->cpuid);
 916         mach->ibc = sclp.ibc;
 917         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 918                S390_ARCH_FAC_LIST_SIZE_BYTE);
 919         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 920                sizeof(S390_lowcore.stfle_fac_list));
 921         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 922                 ret = -EFAULT;
 923         kfree(mach);
 924 out:
 925         return ret;
 926 }
 927
 928 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 929                                        struct kvm_device_attr *attr)
 930 {
 931         struct kvm_s390_vm_cpu_feat data;
 932
 933         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 934                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 935         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 936                 return -EFAULT;
 937         return 0;
 938 }
 939
 940 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 941                                      struct kvm_device_attr *attr)
 942 {
 943         struct kvm_s390_vm_cpu_feat data;
 944
 945         bitmap_copy((unsigned long *) data.feat,
 946                     kvm_s390_available_cpu_feat,
 947                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 948         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 949                 return -EFAULT;
 950         return 0;
 951 }
 952
 953 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 954                                           struct kvm_device_attr *attr)
 955 {
 956         /*
 957          * Once we can actually configure subfunctions (kernel + hw support),
 958          * we have to check if they were already set by user space, if so copy
 959          * them from kvm->arch.
 960          */
 961         return -ENXIO;
 962 }
 963
 964 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 965                                         struct kvm_device_attr *attr)
 966 {
 967         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 968             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 969                 return -EFAULT;
 970         return 0;
 971 }
 972 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 973 {
 974         int ret = -ENXIO;
 975
 976         switch (attr->attr) {
 977         case KVM_S390_VM_CPU_PROCESSOR:
 978                 ret = kvm_s390_get_processor(kvm, attr);
 979                 break;
 980         case KVM_S390_VM_CPU_MACHINE:
 981                 ret = kvm_s390_get_machine(kvm, attr);
 982                 break;
 983         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 984                 ret = kvm_s390_get_processor_feat(kvm, attr);
 985                 break;
 986         case KVM_S390_VM_CPU_MACHINE_FEAT:
 987                 ret = kvm_s390_get_machine_feat(kvm, attr);
 988                 break;
 989         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 990                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 991                 break;
 992         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 993                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 994                 break;
 995         }
 996         return ret;
 997 }
 998
 999 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1000 {
1001         int ret;
1002
1003         switch (attr->group) {
1004         case KVM_S390_VM_MEM_CTRL:
1005                 ret = kvm_s390_set_mem_control(kvm, attr);
1006                 break;
1007         case KVM_S390_VM_TOD:
1008                 ret = kvm_s390_set_tod(kvm, attr);
1009                 break;
1010         case KVM_S390_VM_CPU_MODEL:
1011                 ret = kvm_s390_set_cpu_model(kvm, attr);
1012                 break;
1013         case KVM_S390_VM_CRYPTO:
1014                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1015                 break;
1016         default:
1017                 ret = -ENXIO;
1018                 break;
1019         }
1020
1021         return ret;
1022 }
1023
1024 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1025 {
1026         int ret;
1027
1028         switch (attr->group) {
1029         case KVM_S390_VM_MEM_CTRL:
1030                 ret = kvm_s390_get_mem_control(kvm, attr);
1031                 break;
1032         case KVM_S390_VM_TOD:
1033                 ret = kvm_s390_get_tod(kvm, attr);
1034                 break;
1035         case KVM_S390_VM_CPU_MODEL:
1036                 ret = kvm_s390_get_cpu_model(kvm, attr);
1037                 break;
1038         default:
1039                 ret = -ENXIO;
1040                 break;
1041         }
1042
1043         return ret;
1044 }
1045
1046 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1047 {
1048         int ret;
1049
1050         switch (attr->group) {
1051         case KVM_S390_VM_MEM_CTRL:
1052                 switch (attr->attr) {
1053                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1054                 case KVM_S390_VM_MEM_CLR_CMMA:
1055                         ret = sclp.has_cmma ? 0 : -ENXIO;
1056                         break;
1057                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1058                         ret = 0;
1059                         break;
1060                 default:
1061                         ret = -ENXIO;
1062                         break;
1063                 }
1064                 break;
1065         case KVM_S390_VM_TOD:
1066                 switch (attr->attr) {
1067                 case KVM_S390_VM_TOD_LOW:
1068                 case KVM_S390_VM_TOD_HIGH:
1069                         ret = 0;
1070                         break;
1071                 default:
1072                         ret = -ENXIO;
1073                         break;
1074                 }
1075                 break;
1076         case KVM_S390_VM_CPU_MODEL:
1077                 switch (attr->attr) {
1078                 case KVM_S390_VM_CPU_PROCESSOR:
1079                 case KVM_S390_VM_CPU_MACHINE:
1080                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1081                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1082                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1083                         ret = 0;
1084                         break;
1085                 /* configuring subfunctions is not supported yet */
1086                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1087                 default:
1088                         ret = -ENXIO;
1089                         break;
1090                 }
1091                 break;
1092         case KVM_S390_VM_CRYPTO:
1093                 switch (attr->attr) {
1094                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1095                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1096                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1097                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1098                         ret = 0;
1099                         break;
1100                 default:
1101                         ret = -ENXIO;
1102                         break;
1103                 }
1104                 break;
1105         default:
1106                 ret = -ENXIO;
1107                 break;
1108         }
1109
1110         return ret;
1111 }
1112
1113 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1114 {
1115         uint8_t *keys;
1116         uint64_t hva;
1117         int i, r = 0;
1118
1119         if (args->flags != 0)
1120                 return -EINVAL;
1121
1122         /* Is this guest using storage keys? */
1123         if (!mm_use_skey(current->mm))
1124                 return KVM_S390_GET_SKEYS_NONE;
1125
1126         /* Enforce sane limit on memory allocation */
1127         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1128                 return -EINVAL;
1129
1130         keys = kmalloc_array(args->count, sizeof(uint8_t),
1131                              GFP_KERNEL | __GFP_NOWARN);
1132         if (!keys)
1133                 keys = vmalloc(sizeof(uint8_t) * args->count);
1134         if (!keys)
1135                 return -ENOMEM;
1136
1137         down_read(&current->mm->mmap_sem);
1138         for (i = 0; i < args->count; i++) {
1139                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1140                 if (kvm_is_error_hva(hva)) {
1141                         r = -EFAULT;
1142                         break;
1143                 }
1144
1145                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1146                 if (r)
1147                         break;
1148         }
1149         up_read(&current->mm->mmap_sem);
1150
1151         if (!r) {
1152                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1153                                  sizeof(uint8_t) * args->count);
1154                 if (r)
1155                         r = -EFAULT;
1156         }
1157
1158         kvfree(keys);
1159         return r;
1160 }
1161
1162 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1163 {
1164         uint8_t *keys;
1165         uint64_t hva;
1166         int i, r = 0;
1167
1168         if (args->flags != 0)
1169                 return -EINVAL;
1170
1171         /* Enforce sane limit on memory allocation */
1172         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1173                 return -EINVAL;
1174
1175         keys = kmalloc_array(args->count, sizeof(uint8_t),
1176                              GFP_KERNEL | __GFP_NOWARN);
1177         if (!keys)
1178                 keys = vmalloc(sizeof(uint8_t) * args->count);
1179         if (!keys)
1180                 return -ENOMEM;
1181
1182         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1183                            sizeof(uint8_t) * args->count);
1184         if (r) {
1185                 r = -EFAULT;
1186                 goto out;
1187         }
1188
1189         /* Enable storage key handling for the guest */
1190         r = s390_enable_skey();
1191         if (r)
1192                 goto out;
1193
1194         down_read(&current->mm->mmap_sem);
1195         for (i = 0; i < args->count; i++) {
1196                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1197                 if (kvm_is_error_hva(hva)) {
1198                         r = -EFAULT;
1199                         break;
1200                 }
1201
1202                 /* Lowest order bit is reserved */
1203                 if (keys[i] & 0x01) {
1204                         r = -EINVAL;
1205                         break;
1206                 }
1207
1208                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1209                 if (r)
1210                         break;
1211         }
1212         up_read(&current->mm->mmap_sem);
1213 out:
1214         kvfree(keys);
1215         return r;
1216 }
1217
1218 long kvm_arch_vm_ioctl(struct file *filp,
1219                        unsigned int ioctl, unsigned long arg)
1220 {
1221         struct kvm *kvm = filp->private_data;
1222         void __user *argp = (void __user *)arg;
1223         struct kvm_device_attr attr;
1224         int r;
1225
1226         switch (ioctl) {
1227         case KVM_S390_INTERRUPT: {
1228                 struct kvm_s390_interrupt s390int;
1229
1230                 r = -EFAULT;
1231                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1232                         break;
1233                 r = kvm_s390_inject_vm(kvm, &s390int);
1234                 break;
1235         }
1236         case KVM_ENABLE_CAP: {
1237                 struct kvm_enable_cap cap;
1238                 r = -EFAULT;
1239                 if (copy_from_user(&cap, argp, sizeof(cap)))
1240                         break;
1241                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1242                 break;
1243         }
1244         case KVM_CREATE_IRQCHIP: {
1245                 struct kvm_irq_routing_entry routing;
1246
1247                 r = -EINVAL;
1248                 if (kvm->arch.use_irqchip) {
1249                         /* Set up dummy routing. */
1250                         memset(&routing, 0, sizeof(routing));
1251                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1252                 }
1253                 break;
1254         }
1255         case KVM_SET_DEVICE_ATTR: {
1256                 r = -EFAULT;
1257                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1258                         break;
1259                 r = kvm_s390_vm_set_attr(kvm, &attr);
1260                 break;
1261         }
1262         case KVM_GET_DEVICE_ATTR: {
1263                 r = -EFAULT;
1264                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1265                         break;
1266                 r = kvm_s390_vm_get_attr(kvm, &attr);
1267                 break;
1268         }
1269         case KVM_HAS_DEVICE_ATTR: {
1270                 r = -EFAULT;
1271                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1272                         break;
1273                 r = kvm_s390_vm_has_attr(kvm, &attr);
1274                 break;
1275         }
1276         case KVM_S390_GET_SKEYS: {
1277                 struct kvm_s390_skeys args;
1278
1279                 r = -EFAULT;
1280                 if (copy_from_user(&args, argp,
1281                                    sizeof(struct kvm_s390_skeys)))
1282                         break;
1283                 r = kvm_s390_get_skeys(kvm, &args);
1284                 break;
1285         }
1286         case KVM_S390_SET_SKEYS: {
1287                 struct kvm_s390_skeys args;
1288
1289                 r = -EFAULT;
1290                 if (copy_from_user(&args, argp,
1291                                    sizeof(struct kvm_s390_skeys)))
1292                         break;
1293                 r = kvm_s390_set_skeys(kvm, &args);
1294                 break;
1295         }
1296         default:
1297                 r = -ENOTTY;
1298         }
1299
1300         return r;
1301 }
1302
1303 static int kvm_s390_query_ap_config(u8 *config)
1304 {
1305         u32 fcn_code = 0x04000000UL;
1306         u32 cc = 0;
1307
1308         memset(config, 0, 128);
1309         asm volatile(
1310                 "lgr 0,%1\n"
1311                 "lgr 2,%2\n"
1312                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1313                 "0: ipm %0\n"
1314                 "srl %0,28\n"
1315                 "1:\n"
1316                 EX_TABLE(0b, 1b)
1317                 : "+r" (cc)
1318                 : "r" (fcn_code), "r" (config)
1319                 : "cc", "0", "2", "memory"
1320         );
1321
1322         return cc;
1323 }
1324
1325 static int kvm_s390_apxa_installed(void)
1326 {
1327         u8 config[128];
1328         int cc;
1329
1330         if (test_facility(12)) {
1331                 cc = kvm_s390_query_ap_config(config);
1332
1333                 if (cc)
1334                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1335                 else
1336                         return config[0] & 0x40;
1337         }
1338
1339         return 0;
1340 }
1341
1342 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1343 {
1344         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1345
1346         if (kvm_s390_apxa_installed())
1347                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1348         else
1349                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1350 }
1351
1352 static u64 kvm_s390_get_initial_cpuid(void)
1353 {
1354         struct cpuid cpuid;
1355
1356         get_cpu_id(&cpuid);
1357         cpuid.version = 0xff;
1358         return *((u64 *) &cpuid);
1359 }
1360
1361 static void kvm_s390_crypto_init(struct kvm *kvm)
1362 {
1363         if (!test_kvm_facility(kvm, 76))
1364                 return;
1365
1366         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1367         kvm_s390_set_crycb_format(kvm);
1368
1369         /* Enable AES/DEA protected key functions by default */
1370         kvm->arch.crypto.aes_kw = 1;
1371         kvm->arch.crypto.dea_kw = 1;
1372         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1373                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1374         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1375                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1376 }
1377
1378 static void sca_dispose(struct kvm *kvm)
1379 {
1380         if (kvm->arch.use_esca)
1381                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1382         else
1383                 free_page((unsigned long)(kvm->arch.sca));
1384         kvm->arch.sca = NULL;
1385 }
1386
1387 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1388 {
1389         gfp_t alloc_flags = GFP_KERNEL;
1390         int i, rc;
1391         char debug_name[16];
1392         static unsigned long sca_offset;
1393
1394         rc = -EINVAL;
1395 #ifdef CONFIG_KVM_S390_UCONTROL
1396         if (type & ~KVM_VM_S390_UCONTROL)
1397                 goto out_err;
1398         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1399                 goto out_err;
1400 #else
1401         if (type)
1402                 goto out_err;
1403 #endif
1404
1405         rc = s390_enable_sie();
1406         if (rc)
1407                 goto out_err;
1408
1409         rc = -ENOMEM;
1410
1411         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1412
1413         kvm->arch.use_esca = 0; /* start with basic SCA */
1414         if (!sclp.has_64bscao)
1415                 alloc_flags |= GFP_DMA;
1416         rwlock_init(&kvm->arch.sca_lock);
1417         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1418         if (!kvm->arch.sca)
1419                 goto out_err;
1420         spin_lock(&kvm_lock);
1421         sca_offset += 16;
1422         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1423                 sca_offset = 0;
1424         kvm->arch.sca = (struct bsca_block *)
1425                         ((char *) kvm->arch.sca + sca_offset);
1426         spin_unlock(&kvm_lock);
1427
1428         sprintf(debug_name, "kvm-%u", current->pid);
1429
1430         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1431         if (!kvm->arch.dbf)
1432                 goto out_err;
1433
1434         kvm->arch.sie_page2 =
1435              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1436         if (!kvm->arch.sie_page2)
1437                 goto out_err;
1438
1439         /* Populate the facility mask initially. */
1440         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1441                sizeof(S390_lowcore.stfle_fac_list));
1442         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1443                 if (i < kvm_s390_fac_list_mask_size())
1444                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1445                 else
1446                         kvm->arch.model.fac_mask[i] = 0UL;
1447         }
1448
1449         /* Populate the facility list initially. */
1450         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1451         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1452                S390_ARCH_FAC_LIST_SIZE_BYTE);
1453
1454         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1455         set_kvm_facility(kvm->arch.model.fac_list, 74);
1456
1457         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1458         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1459
1460         kvm_s390_crypto_init(kvm);
1461
1462         spin_lock_init(&kvm->arch.float_int.lock);
1463         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1464                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1465         init_waitqueue_head(&kvm->arch.ipte_wq);
1466         mutex_init(&kvm->arch.ipte_mutex);
1467
1468         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1469         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1470
1471         if (type & KVM_VM_S390_UCONTROL) {
1472                 kvm->arch.gmap = NULL;
1473                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1474         } else {
1475                 if (sclp.hamax == U64_MAX)
1476                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1477                 else
1478                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1479                                                     sclp.hamax + 1);
1480                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1481                 if (!kvm->arch.gmap)
1482                         goto out_err;
1483                 kvm->arch.gmap->private = kvm;
1484                 kvm->arch.gmap->pfault_enabled = 0;
1485         }
1486
1487         kvm->arch.css_support = 0;
1488         kvm->arch.use_irqchip = 0;
1489         kvm->arch.epoch = 0;
1490
1491         spin_lock_init(&kvm->arch.start_stop_lock);
1492         kvm_s390_vsie_init(kvm);
1493         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1494
1495         return 0;
1496 out_err:
1497         free_page((unsigned long)kvm->arch.sie_page2);
1498         debug_unregister(kvm->arch.dbf);
1499         sca_dispose(kvm);
1500         KVM_EVENT(3, "creation of vm failed: %d", rc);
1501         return rc;
1502 }
1503
1504 bool kvm_arch_has_vcpu_debugfs(void)
1505 {
1506         return false;
1507 }
1508
1509 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1510 {
1511         return 0;
1512 }
1513
1514 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1515 {
1516         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1517         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1518         kvm_s390_clear_local_irqs(vcpu);
1519         kvm_clear_async_pf_completion_queue(vcpu);
1520         if (!kvm_is_ucontrol(vcpu->kvm))
1521                 sca_del_vcpu(vcpu);
1522
1523         if (kvm_is_ucontrol(vcpu->kvm))
1524                 gmap_remove(vcpu->arch.gmap);
1525
1526         if (vcpu->kvm->arch.use_cmma)
1527                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1528         free_page((unsigned long)(vcpu->arch.sie_block));
1529
1530         kvm_vcpu_uninit(vcpu);
1531         kmem_cache_free(kvm_vcpu_cache, vcpu);
1532 }
1533
1534 static void kvm_free_vcpus(struct kvm *kvm)
1535 {
1536         unsigned int i;
1537         struct kvm_vcpu *vcpu;
1538
1539         kvm_for_each_vcpu(i, vcpu, kvm)
1540                 kvm_arch_vcpu_destroy(vcpu);
1541
1542         mutex_lock(&kvm->lock);
1543         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1544                 kvm->vcpus[i] = NULL;
1545
1546         atomic_set(&kvm->online_vcpus, 0);
1547         mutex_unlock(&kvm->lock);
1548 }
1549
1550 void kvm_arch_destroy_vm(struct kvm *kvm)
1551 {
1552         kvm_free_vcpus(kvm);
1553         sca_dispose(kvm);
1554         debug_unregister(kvm->arch.dbf);
1555         free_page((unsigned long)kvm->arch.sie_page2);
1556         if (!kvm_is_ucontrol(kvm))
1557                 gmap_remove(kvm->arch.gmap);
1558         kvm_s390_destroy_adapters(kvm);
1559         kvm_s390_clear_float_irqs(kvm);
1560         kvm_s390_vsie_destroy(kvm);
1561         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1562 }
1563
1564 /* Section: vcpu related */
1565 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1566 {
1567         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1568         if (!vcpu->arch.gmap)
1569                 return -ENOMEM;
1570         vcpu->arch.gmap->private = vcpu->kvm;
1571
1572         return 0;
1573 }
1574
1575 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1576 {
1577         if (!kvm_s390_use_sca_entries())
1578                 return;
1579         read_lock(&vcpu->kvm->arch.sca_lock);
1580         if (vcpu->kvm->arch.use_esca) {
1581                 struct esca_block *sca = vcpu->kvm->arch.sca;
1582
1583                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1584                 sca->cpu[vcpu->vcpu_id].sda = 0;
1585         } else {
1586                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1587
1588                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1589                 sca->cpu[vcpu->vcpu_id].sda = 0;
1590         }
1591         read_unlock(&vcpu->kvm->arch.sca_lock);
1592 }
1593
1594 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1595 {
1596         if (!kvm_s390_use_sca_entries()) {
1597                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1598
1599                 /* we still need the basic sca for the ipte control */
1600                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1601                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1602         }
1603         read_lock(&vcpu->kvm->arch.sca_lock);
1604         if (vcpu->kvm->arch.use_esca) {
1605                 struct esca_block *sca = vcpu->kvm->arch.sca;
1606
1607                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1608                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1609                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1610                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1611                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1612         } else {
1613                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1614
1615                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1616                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1617                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1618                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1619         }
1620         read_unlock(&vcpu->kvm->arch.sca_lock);
1621 }
1622
1623 /* Basic SCA to Extended SCA data copy routines */
1624 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1625 {
1626         d->sda = s->sda;
1627         d->sigp_ctrl.c = s->sigp_ctrl.c;
1628         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1629 }
1630
1631 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1632 {
1633         int i;
1634
1635         d->ipte_control = s->ipte_control;
1636         d->mcn[0] = s->mcn;
1637         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1638                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1639 }
1640
1641 static int sca_switch_to_extended(struct kvm *kvm)
1642 {
1643         struct bsca_block *old_sca = kvm->arch.sca;
1644         struct esca_block *new_sca;
1645         struct kvm_vcpu *vcpu;
1646         unsigned int vcpu_idx;
1647         u32 scaol, scaoh;
1648
1649         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1650         if (!new_sca)
1651                 return -ENOMEM;
1652
1653         scaoh = (u32)((u64)(new_sca) >> 32);
1654         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1655
1656         kvm_s390_vcpu_block_all(kvm);
1657         write_lock(&kvm->arch.sca_lock);
1658
1659         sca_copy_b_to_e(new_sca, old_sca);
1660
1661         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1662                 vcpu->arch.sie_block->scaoh = scaoh;
1663                 vcpu->arch.sie_block->scaol = scaol;
1664                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1665         }
1666         kvm->arch.sca = new_sca;
1667         kvm->arch.use_esca = 1;
1668
1669         write_unlock(&kvm->arch.sca_lock);
1670         kvm_s390_vcpu_unblock_all(kvm);
1671
1672         free_page((unsigned long)old_sca);
1673
1674         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1675                  old_sca, kvm->arch.sca);
1676         return 0;
1677 }
1678
1679 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1680 {
1681         int rc;
1682
1683         if (!kvm_s390_use_sca_entries()) {
1684                 if (id < KVM_MAX_VCPUS)
1685                         return true;
1686                 return false;
1687         }
1688         if (id < KVM_S390_BSCA_CPU_SLOTS)
1689                 return true;
1690         if (!sclp.has_esca || !sclp.has_64bscao)
1691                 return false;
1692
1693         mutex_lock(&kvm->lock);
1694         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1695         mutex_unlock(&kvm->lock);
1696
1697         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1698 }
1699
1700 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1701 {
1702         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1703         kvm_clear_async_pf_completion_queue(vcpu);
1704         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1705                                     KVM_SYNC_GPRS |
1706                                     KVM_SYNC_ACRS |
1707                                     KVM_SYNC_CRS |
1708                                     KVM_SYNC_ARCH0 |
1709                                     KVM_SYNC_PFAULT;
1710         kvm_s390_set_prefix(vcpu, 0);
1711         if (test_kvm_facility(vcpu->kvm, 64))
1712                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1713         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1714          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1715          */
1716         if (MACHINE_HAS_VX)
1717                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1718         else
1719                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1720
1721         if (kvm_is_ucontrol(vcpu->kvm))
1722                 return __kvm_ucontrol_vcpu_init(vcpu);
1723
1724         return 0;
1725 }
1726
1727 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1728 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1729 {
1730         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1731         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1732         vcpu->arch.cputm_start = get_tod_clock_fast();
1733         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1734 }
1735
1736 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1737 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1738 {
1739         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1740         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1741         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1742         vcpu->arch.cputm_start = 0;
1743         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1744 }
1745
1746 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1747 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1748 {
1749         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1750         vcpu->arch.cputm_enabled = true;
1751         __start_cpu_timer_accounting(vcpu);
1752 }
1753
1754 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1755 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1756 {
1757         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1758         __stop_cpu_timer_accounting(vcpu);
1759         vcpu->arch.cputm_enabled = false;
1760 }
1761
1762 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1763 {
1764         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1765         __enable_cpu_timer_accounting(vcpu);
1766         preempt_enable();
1767 }
1768
1769 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1770 {
1771         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1772         __disable_cpu_timer_accounting(vcpu);
1773         preempt_enable();
1774 }
1775
1776 /* set the cpu timer - may only be called from the VCPU thread itself */
1777 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1778 {
1779         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1780         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1781         if (vcpu->arch.cputm_enabled)
1782                 vcpu->arch.cputm_start = get_tod_clock_fast();
1783         vcpu->arch.sie_block->cputm = cputm;
1784         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1785         preempt_enable();
1786 }
1787
1788 /* update and get the cpu timer - can also be called from other VCPU threads */
1789 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1790 {
1791         unsigned int seq;
1792         __u64 value;
1793
1794         if (unlikely(!vcpu->arch.cputm_enabled))
1795                 return vcpu->arch.sie_block->cputm;
1796
1797         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1798         do {
1799                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1800                 /*
1801                  * If the writer would ever execute a read in the critical
1802                  * section, e.g. in irq context, we have a deadlock.
1803                  */
1804                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1805                 value = vcpu->arch.sie_block->cputm;
1806                 /* if cputm_start is 0, accounting is being started/stopped */
1807                 if (likely(vcpu->arch.cputm_start))
1808                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1809         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1810         preempt_enable();
1811         return value;
1812 }
1813
1814 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1815 {
1816
1817         gmap_enable(vcpu->arch.enabled_gmap);
1818         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1819         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1820                 __start_cpu_timer_accounting(vcpu);
1821         vcpu->cpu = cpu;
1822 }
1823
1824 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1825 {
1826         vcpu->cpu = -1;
1827         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1828                 __stop_cpu_timer_accounting(vcpu);
1829         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1830         vcpu->arch.enabled_gmap = gmap_get_enabled();
1831         gmap_disable(vcpu->arch.enabled_gmap);
1832
1833 }
1834
1835 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1836 {
1837         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1838         vcpu->arch.sie_block->gpsw.mask = 0UL;
1839         vcpu->arch.sie_block->gpsw.addr = 0UL;
1840         kvm_s390_set_prefix(vcpu, 0);
1841         kvm_s390_set_cpu_timer(vcpu, 0);
1842         vcpu->arch.sie_block->ckc       = 0UL;
1843         vcpu->arch.sie_block->todpr     = 0;
1844         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1845         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1846         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1847         /* make sure the new fpc will be lazily loaded */
1848         save_fpu_regs();
1849         current->thread.fpu.fpc = 0;
1850         vcpu->arch.sie_block->gbea = 1;
1851         vcpu->arch.sie_block->pp = 0;
1852         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1853         kvm_clear_async_pf_completion_queue(vcpu);
1854         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1855                 kvm_s390_vcpu_stop(vcpu);
1856         kvm_s390_clear_local_irqs(vcpu);
1857 }
1858
1859 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1860 {
1861         mutex_lock(&vcpu->kvm->lock);
1862         preempt_disable();
1863         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1864         preempt_enable();
1865         mutex_unlock(&vcpu->kvm->lock);
1866         if (!kvm_is_ucontrol(vcpu->kvm)) {
1867                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1868                 sca_add_vcpu(vcpu);
1869         }
1870         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1871                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1872         /* make vcpu_load load the right gmap on the first trigger */
1873         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1874 }
1875
1876 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1877 {
1878         if (!test_kvm_facility(vcpu->kvm, 76))
1879                 return;
1880
1881         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1882
1883         if (vcpu->kvm->arch.crypto.aes_kw)
1884                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1885         if (vcpu->kvm->arch.crypto.dea_kw)
1886                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1887
1888         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1889 }
1890
1891 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1892 {
1893         free_page(vcpu->arch.sie_block->cbrlo);
1894         vcpu->arch.sie_block->cbrlo = 0;
1895 }
1896
1897 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1898 {
1899         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1900         if (!vcpu->arch.sie_block->cbrlo)
1901                 return -ENOMEM;
1902
1903         vcpu->arch.sie_block->ecb2 |= 0x80;
1904         vcpu->arch.sie_block->ecb2 &= ~0x08;
1905         return 0;
1906 }
1907
1908 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1909 {
1910         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1911
1912         vcpu->arch.sie_block->ibc = model->ibc;
1913         if (test_kvm_facility(vcpu->kvm, 7))
1914                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1915 }
1916
1917 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1918 {
1919         int rc = 0;
1920
1921         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1922                                                     CPUSTAT_SM |
1923                                                     CPUSTAT_STOPPED);
1924
1925         if (test_kvm_facility(vcpu->kvm, 78))
1926                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1927         else if (test_kvm_facility(vcpu->kvm, 8))
1928                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1929
1930         kvm_s390_vcpu_setup_model(vcpu);
1931
1932         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1933         if (MACHINE_HAS_ESOP)
1934                 vcpu->arch.sie_block->ecb |= 0x02;
1935         if (test_kvm_facility(vcpu->kvm, 9))
1936                 vcpu->arch.sie_block->ecb |= 0x04;
1937         if (test_kvm_facility(vcpu->kvm, 73))
1938                 vcpu->arch.sie_block->ecb |= 0x10;
1939
1940         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1941                 vcpu->arch.sie_block->ecb2 |= 0x08;
1942         vcpu->arch.sie_block->eca = 0x1002000U;
1943         if (sclp.has_cei)
1944                 vcpu->arch.sie_block->eca |= 0x80000000U;
1945         if (sclp.has_ib)
1946                 vcpu->arch.sie_block->eca |= 0x40000000U;
1947         if (sclp.has_siif)
1948                 vcpu->arch.sie_block->eca |= 1;
1949         if (sclp.has_sigpif)
1950                 vcpu->arch.sie_block->eca |= 0x10000000U;
1951         if (test_kvm_facility(vcpu->kvm, 129)) {
1952                 vcpu->arch.sie_block->eca |= 0x00020000;
1953                 vcpu->arch.sie_block->ecd |= 0x20000000;
1954         }
1955         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1956         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1957
1958         if (vcpu->kvm->arch.use_cmma) {
1959                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1960                 if (rc)
1961                         return rc;
1962         }
1963         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1964         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1965
1966         kvm_s390_vcpu_crypto_setup(vcpu);
1967
1968         return rc;
1969 }
1970
1971 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1972                                       unsigned int id)
1973 {
1974         struct kvm_vcpu *vcpu;
1975         struct sie_page *sie_page;
1976         int rc = -EINVAL;
1977
1978         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1979                 goto out;
1980
1981         rc = -ENOMEM;
1982
1983         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1984         if (!vcpu)
1985                 goto out;
1986
1987         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1988         if (!sie_page)
1989                 goto out_free_cpu;
1990
1991         vcpu->arch.sie_block = &sie_page->sie_block;
1992         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1993
1994         /* the real guest size will always be smaller than msl */
1995         vcpu->arch.sie_block->mso = 0;
1996         vcpu->arch.sie_block->msl = sclp.hamax;
1997
1998         vcpu->arch.sie_block->icpua = id;
1999         spin_lock_init(&vcpu->arch.local_int.lock);
2000         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2001         vcpu->arch.local_int.wq = &vcpu->wq;
2002         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2003         seqcount_init(&vcpu->arch.cputm_seqcount);
2004
2005         rc = kvm_vcpu_init(vcpu, kvm, id);
2006         if (rc)
2007                 goto out_free_sie_block;
2008         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2009                  vcpu->arch.sie_block);
2010         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2011
2012         return vcpu;
2013 out_free_sie_block:
2014         free_page((unsigned long)(vcpu->arch.sie_block));
2015 out_free_cpu:
2016         kmem_cache_free(kvm_vcpu_cache, vcpu);
2017 out:
2018         return ERR_PTR(rc);
2019 }
2020
2021 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2022 {
2023         return kvm_s390_vcpu_has_irq(vcpu, 0);
2024 }
2025
2026 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2027 {
2028         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2029         exit_sie(vcpu);
2030 }
2031
2032 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2033 {
2034         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2035 }
2036
2037 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2038 {
2039         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2040         exit_sie(vcpu);
2041 }
2042
2043 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2044 {
2045         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2046 }
2047
2048 /*
2049  * Kick a guest cpu out of SIE and wait until SIE is not running.
2050  * If the CPU is not running (e.g. waiting as idle) the function will
2051  * return immediately. */
2052 void exit_sie(struct kvm_vcpu *vcpu)
2053 {
2054         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2055         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2056                 cpu_relax();
2057 }
2058
2059 /* Kick a guest cpu out of SIE to process a request synchronously */
2060 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2061 {
2062         kvm_make_request(req, vcpu);
2063         kvm_s390_vcpu_request(vcpu);
2064 }
2065
2066 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2067                               unsigned long end)
2068 {
2069         struct kvm *kvm = gmap->private;
2070         struct kvm_vcpu *vcpu;
2071         unsigned long prefix;
2072         int i;
2073
2074         if (gmap_is_shadow(gmap))
2075                 return;
2076         if (start >= 1UL << 31)
2077                 /* We are only interested in prefix pages */
2078                 return;
2079         kvm_for_each_vcpu(i, vcpu, kvm) {
2080                 /* match against both prefix pages */
2081                 prefix = kvm_s390_get_prefix(vcpu);
2082                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2083                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2084                                    start, end);
2085                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2086                 }
2087         }
2088 }
2089
2090 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2091 {
2092         /* kvm common code refers to this, but never calls it */
2093         BUG();
2094         return 0;
2095 }
2096
2097 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2098                                            struct kvm_one_reg *reg)
2099 {
2100         int r = -EINVAL;
2101
2102         switch (reg->id) {
2103         case KVM_REG_S390_TODPR:
2104                 r = put_user(vcpu->arch.sie_block->todpr,
2105                              (u32 __user *)reg->addr);
2106                 break;
2107         case KVM_REG_S390_EPOCHDIFF:
2108                 r = put_user(vcpu->arch.sie_block->epoch,
2109                              (u64 __user *)reg->addr);
2110                 break;
2111         case KVM_REG_S390_CPU_TIMER:
2112                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2113                              (u64 __user *)reg->addr);
2114                 break;
2115         case KVM_REG_S390_CLOCK_COMP:
2116                 r = put_user(vcpu->arch.sie_block->ckc,
2117                              (u64 __user *)reg->addr);
2118                 break;
2119         case KVM_REG_S390_PFTOKEN:
2120                 r = put_user(vcpu->arch.pfault_token,
2121                              (u64 __user *)reg->addr);
2122                 break;
2123         case KVM_REG_S390_PFCOMPARE:
2124                 r = put_user(vcpu->arch.pfault_compare,
2125                              (u64 __user *)reg->addr);
2126                 break;
2127         case KVM_REG_S390_PFSELECT:
2128                 r = put_user(vcpu->arch.pfault_select,
2129                              (u64 __user *)reg->addr);
2130                 break;
2131         case KVM_REG_S390_PP:
2132                 r = put_user(vcpu->arch.sie_block->pp,
2133                              (u64 __user *)reg->addr);
2134                 break;
2135         case KVM_REG_S390_GBEA:
2136                 r = put_user(vcpu->arch.sie_block->gbea,
2137                              (u64 __user *)reg->addr);
2138                 break;
2139         default:
2140                 break;
2141         }
2142
2143         return r;
2144 }
2145
2146 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2147                                            struct kvm_one_reg *reg)
2148 {
2149         int r = -EINVAL;
2150         __u64 val;
2151
2152         switch (reg->id) {
2153         case KVM_REG_S390_TODPR:
2154                 r = get_user(vcpu->arch.sie_block->todpr,
2155                              (u32 __user *)reg->addr);
2156                 break;
2157         case KVM_REG_S390_EPOCHDIFF:
2158                 r = get_user(vcpu->arch.sie_block->epoch,
2159                              (u64 __user *)reg->addr);
2160                 break;
2161         case KVM_REG_S390_CPU_TIMER:
2162                 r = get_user(val, (u64 __user *)reg->addr);
2163                 if (!r)
2164                         kvm_s390_set_cpu_timer(vcpu, val);
2165                 break;
2166         case KVM_REG_S390_CLOCK_COMP:
2167                 r = get_user(vcpu->arch.sie_block->ckc,
2168                              (u64 __user *)reg->addr);
2169                 break;
2170         case KVM_REG_S390_PFTOKEN:
2171                 r = get_user(vcpu->arch.pfault_token,
2172                              (u64 __user *)reg->addr);
2173                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2174                         kvm_clear_async_pf_completion_queue(vcpu);
2175                 break;
2176         case KVM_REG_S390_PFCOMPARE:
2177                 r = get_user(vcpu->arch.pfault_compare,
2178                              (u64 __user *)reg->addr);
2179                 break;
2180         case KVM_REG_S390_PFSELECT:
2181                 r = get_user(vcpu->arch.pfault_select,
2182                              (u64 __user *)reg->addr);
2183                 break;
2184         case KVM_REG_S390_PP:
2185                 r = get_user(vcpu->arch.sie_block->pp,
2186                              (u64 __user *)reg->addr);
2187                 break;
2188         case KVM_REG_S390_GBEA:
2189                 r = get_user(vcpu->arch.sie_block->gbea,
2190                              (u64 __user *)reg->addr);
2191                 break;
2192         default:
2193                 break;
2194         }
2195
2196         return r;
2197 }
2198
2199 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2200 {
2201         kvm_s390_vcpu_initial_reset(vcpu);
2202         return 0;
2203 }
2204
2205 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2206 {
2207         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2208         return 0;
2209 }
2210
2211 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2212 {
2213         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2214         return 0;
2215 }
2216
2217 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2218                                   struct kvm_sregs *sregs)
2219 {
2220         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2221         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2222         return 0;
2223 }
2224
2225 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2226                                   struct kvm_sregs *sregs)
2227 {
2228         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2229         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2230         return 0;
2231 }
2232
2233 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2234 {
2235         if (test_fp_ctl(fpu->fpc))
2236                 return -EINVAL;
2237         vcpu->run->s.regs.fpc = fpu->fpc;
2238         if (MACHINE_HAS_VX)
2239                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2240                                  (freg_t *) fpu->fprs);
2241         else
2242                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2243         return 0;
2244 }
2245
2246 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2247 {
2248         /* make sure we have the latest values */
2249         save_fpu_regs();
2250         if (MACHINE_HAS_VX)
2251                 convert_vx_to_fp((freg_t *) fpu->fprs,
2252                                  (__vector128 *) vcpu->run->s.regs.vrs);
2253         else
2254                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2255         fpu->fpc = vcpu->run->s.regs.fpc;
2256         return 0;
2257 }
2258
2259 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2260 {
2261         int rc = 0;
2262
2263         if (!is_vcpu_stopped(vcpu))
2264                 rc = -EBUSY;
2265         else {
2266                 vcpu->run->psw_mask = psw.mask;
2267                 vcpu->run->psw_addr = psw.addr;
2268         }
2269         return rc;
2270 }
2271
2272 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2273                                   struct kvm_translation *tr)
2274 {
2275         return -EINVAL; /* not implemented yet */
2276 }
2277
2278 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2279                               KVM_GUESTDBG_USE_HW_BP | \
2280                               KVM_GUESTDBG_ENABLE)
2281
2282 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2283                                         struct kvm_guest_debug *dbg)
2284 {
2285         int rc = 0;
2286
2287         vcpu->guest_debug = 0;
2288         kvm_s390_clear_bp_data(vcpu);
2289
2290         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2291                 return -EINVAL;
2292         if (!sclp.has_gpere)
2293                 return -EINVAL;
2294
2295         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2296                 vcpu->guest_debug = dbg->control;
2297                 /* enforce guest PER */
2298                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2299
2300                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2301                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2302         } else {
2303                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2304                 vcpu->arch.guestdbg.last_bp = 0;
2305         }
2306
2307         if (rc) {
2308                 vcpu->guest_debug = 0;
2309                 kvm_s390_clear_bp_data(vcpu);
2310                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2311         }
2312
2313         return rc;
2314 }
2315
2316 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2317                                     struct kvm_mp_state *mp_state)
2318 {
2319         /* CHECK_STOP and LOAD are not supported yet */
2320         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2321                                        KVM_MP_STATE_OPERATING;
2322 }
2323
2324 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2325                                     struct kvm_mp_state *mp_state)
2326 {
2327         int rc = 0;
2328
2329         /* user space knows about this interface - let it control the state */
2330         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2331
2332         switch (mp_state->mp_state) {
2333         case KVM_MP_STATE_STOPPED:
2334                 kvm_s390_vcpu_stop(vcpu);
2335                 break;
2336         case KVM_MP_STATE_OPERATING:
2337                 kvm_s390_vcpu_start(vcpu);
2338                 break;
2339         case KVM_MP_STATE_LOAD:
2340         case KVM_MP_STATE_CHECK_STOP:
2341                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2342         default:
2343                 rc = -ENXIO;
2344         }
2345
2346         return rc;
2347 }
2348
2349 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2350 {
2351         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2352 }
2353
2354 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2355 {
2356 retry:
2357         kvm_s390_vcpu_request_handled(vcpu);
2358         if (!vcpu->requests)
2359                 return 0;
2360         /*
2361          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2362          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2363          * This ensures that the ipte instruction for this request has
2364          * already finished. We might race against a second unmapper that
2365          * wants to set the blocking bit. Lets just retry the request loop.
2366          */
2367         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2368                 int rc;
2369                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2370                                           kvm_s390_get_prefix(vcpu),
2371                                           PAGE_SIZE * 2, PROT_WRITE);
2372                 if (rc) {
2373                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2374                         return rc;
2375                 }
2376                 goto retry;
2377         }
2378
2379         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2380                 vcpu->arch.sie_block->ihcpu = 0xffff;
2381                 goto retry;
2382         }
2383
2384         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2385                 if (!ibs_enabled(vcpu)) {
2386                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2387                         atomic_or(CPUSTAT_IBS,
2388                                         &vcpu->arch.sie_block->cpuflags);
2389                 }
2390                 goto retry;
2391         }
2392
2393         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2394                 if (ibs_enabled(vcpu)) {
2395                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2396                         atomic_andnot(CPUSTAT_IBS,
2397                                           &vcpu->arch.sie_block->cpuflags);
2398                 }
2399                 goto retry;
2400         }
2401
2402         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2403                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2404                 goto retry;
2405         }
2406
2407         /* nothing to do, just clear the request */
2408         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2409
2410         return 0;
2411 }
2412
2413 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2414 {
2415         struct kvm_vcpu *vcpu;
2416         int i;
2417
2418         mutex_lock(&kvm->lock);
2419         preempt_disable();
2420         kvm->arch.epoch = tod - get_tod_clock();
2421         kvm_s390_vcpu_block_all(kvm);
2422         kvm_for_each_vcpu(i, vcpu, kvm)
2423                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2424         kvm_s390_vcpu_unblock_all(kvm);
2425         preempt_enable();
2426         mutex_unlock(&kvm->lock);
2427 }
2428
2429 /**
2430  * kvm_arch_fault_in_page - fault-in guest page if necessary
2431  * @vcpu: The corresponding virtual cpu
2432  * @gpa: Guest physical address
2433  * @writable: Whether the page should be writable or not
2434  *
2435  * Make sure that a guest page has been faulted-in on the host.
2436  *
2437  * Return: Zero on success, negative error code otherwise.
2438  */
2439 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2440 {
2441         return gmap_fault(vcpu->arch.gmap, gpa,
2442                           writable ? FAULT_FLAG_WRITE : 0);
2443 }
2444
2445 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2446                                       unsigned long token)
2447 {
2448         struct kvm_s390_interrupt inti;
2449         struct kvm_s390_irq irq;
2450
2451         if (start_token) {
2452                 irq.u.ext.ext_params2 = token;
2453                 irq.type = KVM_S390_INT_PFAULT_INIT;
2454                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2455         } else {
2456                 inti.type = KVM_S390_INT_PFAULT_DONE;
2457                 inti.parm64 = token;
2458                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2459         }
2460 }
2461
2462 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2463                                      struct kvm_async_pf *work)
2464 {
2465         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2466         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2467 }
2468
2469 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2470                                  struct kvm_async_pf *work)
2471 {
2472         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2473         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2474 }
2475
2476 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2477                                struct kvm_async_pf *work)
2478 {
2479         /* s390 will always inject the page directly */
2480 }
2481
2482 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2483 {
2484         /*
2485          * s390 will always inject the page directly,
2486          * but we still want check_async_completion to cleanup
2487          */
2488         return true;
2489 }
2490
2491 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2492 {
2493         hva_t hva;
2494         struct kvm_arch_async_pf arch;
2495         int rc;
2496
2497         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2498                 return 0;
2499         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2500             vcpu->arch.pfault_compare)
2501                 return 0;
2502         if (psw_extint_disabled(vcpu))
2503                 return 0;
2504         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2505                 return 0;
2506         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2507                 return 0;
2508         if (!vcpu->arch.gmap->pfault_enabled)
2509                 return 0;
2510
2511         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2512         hva += current->thread.gmap_addr & ~PAGE_MASK;
2513         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2514                 return 0;
2515
2516         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2517         return rc;
2518 }
2519
2520 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2521 {
2522         int rc, cpuflags;
2523
2524         /*
2525          * On s390 notifications for arriving pages will be delivered directly
2526          * to the guest but the house keeping for completed pfaults is
2527          * handled outside the worker.
2528          */
2529         kvm_check_async_pf_completion(vcpu);
2530
2531         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2532         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2533
2534         if (need_resched())
2535                 schedule();
2536
2537         if (test_cpu_flag(CIF_MCCK_PENDING))
2538                 s390_handle_mcck();
2539
2540         if (!kvm_is_ucontrol(vcpu->kvm)) {
2541                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2542                 if (rc)
2543                         return rc;
2544         }
2545
2546         rc = kvm_s390_handle_requests(vcpu);
2547         if (rc)
2548                 return rc;
2549
2550         if (guestdbg_enabled(vcpu)) {
2551                 kvm_s390_backup_guest_per_regs(vcpu);
2552                 kvm_s390_patch_guest_per_regs(vcpu);
2553         }
2554
2555         vcpu->arch.sie_block->icptcode = 0;
2556         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2557         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2558         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2559
2560         return 0;
2561 }
2562
2563 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2564 {
2565         struct kvm_s390_pgm_info pgm_info = {
2566                 .code = PGM_ADDRESSING,
2567         };
2568         u8 opcode, ilen;
2569         int rc;
2570
2571         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2572         trace_kvm_s390_sie_fault(vcpu);
2573
2574         /*
2575          * We want to inject an addressing exception, which is defined as a
2576          * suppressing or terminating exception. However, since we came here
2577          * by a DAT access exception, the PSW still points to the faulting
2578          * instruction since DAT exceptions are nullifying. So we've got
2579          * to look up the current opcode to get the length of the instruction
2580          * to be able to forward the PSW.
2581          */
2582         rc = read_guest_instr(vcpu, &opcode, 1);
2583         ilen = insn_length(opcode);
2584         if (rc < 0) {
2585                 return rc;
2586         } else if (rc) {
2587                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2588                  * Forward by arbitrary ilc, injection will take care of
2589                  * nullification if necessary.
2590                  */
2591                 pgm_info = vcpu->arch.pgm;
2592                 ilen = 4;
2593         }
2594         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2595         kvm_s390_forward_psw(vcpu, ilen);
2596         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2597 }
2598
2599 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2600 {
2601         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2602                    vcpu->arch.sie_block->icptcode);
2603         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2604
2605         if (guestdbg_enabled(vcpu))
2606                 kvm_s390_restore_guest_per_regs(vcpu);
2607
2608         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2609         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2610
2611         if (vcpu->arch.sie_block->icptcode > 0) {
2612                 int rc = kvm_handle_sie_intercept(vcpu);
2613
2614                 if (rc != -EOPNOTSUPP)
2615                         return rc;
2616                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2617                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2618                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2619                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2620                 return -EREMOTE;
2621         } else if (exit_reason != -EFAULT) {
2622                 vcpu->stat.exit_null++;
2623                 return 0;
2624         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2625                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2626                 vcpu->run->s390_ucontrol.trans_exc_code =
2627                                                 current->thread.gmap_addr;
2628                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2629                 return -EREMOTE;
2630         } else if (current->thread.gmap_pfault) {
2631                 trace_kvm_s390_major_guest_pfault(vcpu);
2632                 current->thread.gmap_pfault = 0;
2633                 if (kvm_arch_setup_async_pf(vcpu))
2634                         return 0;
2635                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2636         }
2637         return vcpu_post_run_fault_in_sie(vcpu);
2638 }
2639
2640 static int __vcpu_run(struct kvm_vcpu *vcpu)
2641 {
2642         int rc, exit_reason;
2643
2644         /*
2645          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2646          * ning the guest), so that memslots (and other stuff) are protected
2647          */
2648         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2649
2650         do {
2651                 rc = vcpu_pre_run(vcpu);
2652                 if (rc)
2653                         break;
2654
2655                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2656                 /*
2657                  * As PF_VCPU will be used in fault handler, between
2658                  * guest_enter and guest_exit should be no uaccess.
2659                  */
2660                 local_irq_disable();
2661                 guest_enter_irqoff();
2662                 __disable_cpu_timer_accounting(vcpu);
2663                 local_irq_enable();
2664                 exit_reason = sie64a(vcpu->arch.sie_block,
2665                                      vcpu->run->s.regs.gprs);
2666                 local_irq_disable();
2667                 __enable_cpu_timer_accounting(vcpu);
2668                 guest_exit_irqoff();
2669                 local_irq_enable();
2670                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2671
2672                 rc = vcpu_post_run(vcpu, exit_reason);
2673         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2674
2675         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2676         return rc;
2677 }
2678
2679 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2680 {
2681         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2682         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2683         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2684                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2685         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2686                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2687                 /* some control register changes require a tlb flush */
2688                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2689         }
2690         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2691                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2692                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2693                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2694                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2695                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2696         }
2697         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2698                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2699                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2700                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2701                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2702                         kvm_clear_async_pf_completion_queue(vcpu);
2703         }
2704         /*
2705          * If userspace sets the riccb (e.g. after migration) to a valid state,
2706          * we should enable RI here instead of doing the lazy enablement.
2707          */
2708         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2709             test_kvm_facility(vcpu->kvm, 64)) {
2710                 struct runtime_instr_cb *riccb =
2711                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2712
2713                 if (riccb->valid)
2714                         vcpu->arch.sie_block->ecb3 |= 0x01;
2715         }
2716         save_access_regs(vcpu->arch.host_acrs);
2717         restore_access_regs(vcpu->run->s.regs.acrs);
2718         /* save host (userspace) fprs/vrs */
2719         save_fpu_regs();
2720         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2721         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2722         if (MACHINE_HAS_VX)
2723                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2724         else
2725                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2726         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2727         if (test_fp_ctl(current->thread.fpu.fpc))
2728                 /* User space provided an invalid FPC, let's clear it */
2729                 current->thread.fpu.fpc = 0;
2730
2731         kvm_run->kvm_dirty_regs = 0;
2732 }
2733
2734 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2735 {
2736         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2737         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2738         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2739         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2740         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2741         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2742         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2743         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2744         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2745         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2746         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2747         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2748         save_access_regs(vcpu->run->s.regs.acrs);
2749         restore_access_regs(vcpu->arch.host_acrs);
2750         /* Save guest register state */
2751         save_fpu_regs();
2752         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2753         /* Restore will be done lazily at return */
2754         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2755         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2756
2757 }
2758
2759 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2760 {
2761         int rc;
2762         sigset_t sigsaved;
2763
2764         if (guestdbg_exit_pending(vcpu)) {
2765                 kvm_s390_prepare_debug_exit(vcpu);
2766                 return 0;
2767         }
2768
2769         if (vcpu->sigset_active)
2770                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2771
2772         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2773                 kvm_s390_vcpu_start(vcpu);
2774         } else if (is_vcpu_stopped(vcpu)) {
2775                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2776                                    vcpu->vcpu_id);
2777                 return -EINVAL;
2778         }
2779
2780         sync_regs(vcpu, kvm_run);
2781         enable_cpu_timer_accounting(vcpu);
2782
2783         might_fault();
2784         rc = __vcpu_run(vcpu);
2785
2786         if (signal_pending(current) && !rc) {
2787                 kvm_run->exit_reason = KVM_EXIT_INTR;
2788                 rc = -EINTR;
2789         }
2790
2791         if (guestdbg_exit_pending(vcpu) && !rc)  {
2792                 kvm_s390_prepare_debug_exit(vcpu);
2793                 rc = 0;
2794         }
2795
2796         if (rc == -EREMOTE) {
2797                 /* userspace support is needed, kvm_run has been prepared */
2798                 rc = 0;
2799         }
2800
2801         disable_cpu_timer_accounting(vcpu);
2802         store_regs(vcpu, kvm_run);
2803
2804         if (vcpu->sigset_active)
2805                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2806
2807         vcpu->stat.exit_userspace++;
2808         return rc;
2809 }
2810
2811 /*
2812  * store status at address
2813  * we use have two special cases:
2814  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2815  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2816  */
2817 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2818 {
2819         unsigned char archmode = 1;
2820         freg_t fprs[NUM_FPRS];
2821         unsigned int px;
2822         u64 clkcomp, cputm;
2823         int rc;
2824
2825         px = kvm_s390_get_prefix(vcpu);
2826         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2827                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2828                         return -EFAULT;
2829                 gpa = 0;
2830         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2831                 if (write_guest_real(vcpu, 163, &archmode, 1))
2832                         return -EFAULT;
2833                 gpa = px;
2834         } else
2835                 gpa -= __LC_FPREGS_SAVE_AREA;
2836
2837         /* manually convert vector registers if necessary */
2838         if (MACHINE_HAS_VX) {
2839                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2840                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2841                                      fprs, 128);
2842         } else {
2843                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2844                                      vcpu->run->s.regs.fprs, 128);
2845         }
2846         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2847                               vcpu->run->s.regs.gprs, 128);
2848         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2849                               &vcpu->arch.sie_block->gpsw, 16);
2850         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2851                               &px, 4);
2852         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2853                               &vcpu->run->s.regs.fpc, 4);
2854         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2855                               &vcpu->arch.sie_block->todpr, 4);
2856         cputm = kvm_s390_get_cpu_timer(vcpu);
2857         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2858                               &cputm, 8);
2859         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2860         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2861                               &clkcomp, 8);
2862         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2863                               &vcpu->run->s.regs.acrs, 64);
2864         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2865                               &vcpu->arch.sie_block->gcr, 128);
2866         return rc ? -EFAULT : 0;
2867 }
2868
2869 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2870 {
2871         /*
2872          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2873          * switch in the run ioctl. Let's update our copies before we save
2874          * it into the save area
2875          */
2876         save_fpu_regs();
2877         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2878         save_access_regs(vcpu->run->s.regs.acrs);
2879
2880         return kvm_s390_store_status_unloaded(vcpu, addr);
2881 }
2882
2883 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2884 {
2885         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2886         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2887 }
2888
2889 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2890 {
2891         unsigned int i;
2892         struct kvm_vcpu *vcpu;
2893
2894         kvm_for_each_vcpu(i, vcpu, kvm) {
2895                 __disable_ibs_on_vcpu(vcpu);
2896         }
2897 }
2898
2899 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2900 {
2901         if (!sclp.has_ibs)
2902                 return;
2903         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2904         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2905 }
2906
2907 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2908 {
2909         int i, online_vcpus, started_vcpus = 0;
2910
2911         if (!is_vcpu_stopped(vcpu))
2912                 return;
2913
2914         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2915         /* Only one cpu at a time may enter/leave the STOPPED state. */
2916         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2917         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2918
2919         for (i = 0; i < online_vcpus; i++) {
2920                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2921                         started_vcpus++;
2922         }
2923
2924         if (started_vcpus == 0) {
2925                 /* we're the only active VCPU -> speed it up */
2926                 __enable_ibs_on_vcpu(vcpu);
2927         } else if (started_vcpus == 1) {
2928                 /*
2929                  * As we are starting a second VCPU, we have to disable
2930                  * the IBS facility on all VCPUs to remove potentially
2931                  * oustanding ENABLE requests.
2932                  */
2933                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2934         }
2935
2936         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2937         /*
2938          * Another VCPU might have used IBS while we were offline.
2939          * Let's play safe and flush the VCPU at startup.
2940          */
2941         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2942         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2943         return;
2944 }
2945
2946 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2947 {
2948         int i, online_vcpus, started_vcpus = 0;
2949         struct kvm_vcpu *started_vcpu = NULL;
2950
2951         if (is_vcpu_stopped(vcpu))
2952                 return;
2953
2954         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2955         /* Only one cpu at a time may enter/leave the STOPPED state. */
2956         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2957         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2958
2959         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2960         kvm_s390_clear_stop_irq(vcpu);
2961
2962         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2963         __disable_ibs_on_vcpu(vcpu);
2964
2965         for (i = 0; i < online_vcpus; i++) {
2966                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2967                         started_vcpus++;
2968                         started_vcpu = vcpu->kvm->vcpus[i];
2969                 }
2970         }
2971
2972         if (started_vcpus == 1) {
2973                 /*
2974                  * As we only have one VCPU left, we want to enable the
2975                  * IBS facility for that VCPU to speed it up.
2976                  */
2977                 __enable_ibs_on_vcpu(started_vcpu);
2978         }
2979
2980         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2981         return;
2982 }
2983
2984 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2985                                      struct kvm_enable_cap *cap)
2986 {
2987         int r;
2988
2989         if (cap->flags)
2990                 return -EINVAL;
2991
2992         switch (cap->cap) {
2993         case KVM_CAP_S390_CSS_SUPPORT:
2994                 if (!vcpu->kvm->arch.css_support) {
2995                         vcpu->kvm->arch.css_support = 1;
2996                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2997                         trace_kvm_s390_enable_css(vcpu->kvm);
2998                 }
2999                 r = 0;
3000                 break;
3001         default:
3002                 r = -EINVAL;
3003                 break;
3004         }
3005         return r;
3006 }
3007
3008 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3009                                   struct kvm_s390_mem_op *mop)
3010 {
3011         void __user *uaddr = (void __user *)mop->buf;
3012         void *tmpbuf = NULL;
3013         int r, srcu_idx;
3014         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3015                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3016
3017         if (mop->flags & ~supported_flags)
3018                 return -EINVAL;
3019
3020         if (mop->size > MEM_OP_MAX_SIZE)
3021                 return -E2BIG;
3022
3023         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3024                 tmpbuf = vmalloc(mop->size);
3025                 if (!tmpbuf)
3026                         return -ENOMEM;
3027         }
3028
3029         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3030
3031         switch (mop->op) {
3032         case KVM_S390_MEMOP_LOGICAL_READ:
3033                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3034                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3035                                             mop->size, GACC_FETCH);
3036                         break;
3037                 }
3038                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3039                 if (r == 0) {
3040                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3041                                 r = -EFAULT;
3042                 }
3043                 break;
3044         case KVM_S390_MEMOP_LOGICAL_WRITE:
3045                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3046                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3047                                             mop->size, GACC_STORE);
3048                         break;
3049                 }
3050                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3051                         r = -EFAULT;
3052                         break;
3053                 }
3054                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3055                 break;
3056         default:
3057                 r = -EINVAL;
3058         }
3059
3060         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3061
3062         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3063                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3064
3065         vfree(tmpbuf);
3066         return r;
3067 }
3068
3069 long kvm_arch_vcpu_ioctl(struct file *filp,
3070                          unsigned int ioctl, unsigned long arg)
3071 {
3072         struct kvm_vcpu *vcpu = filp->private_data;
3073         void __user *argp = (void __user *)arg;
3074         int idx;
3075         long r;
3076
3077         switch (ioctl) {
3078         case KVM_S390_IRQ: {
3079                 struct kvm_s390_irq s390irq;
3080
3081                 r = -EFAULT;
3082                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3083                         break;
3084                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3085                 break;
3086         }
3087         case KVM_S390_INTERRUPT: {
3088                 struct kvm_s390_interrupt s390int;
3089                 struct kvm_s390_irq s390irq;
3090
3091                 r = -EFAULT;
3092                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3093                         break;
3094                 if (s390int_to_s390irq(&s390int, &s390irq))
3095                         return -EINVAL;
3096                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3097                 break;
3098         }
3099         case KVM_S390_STORE_STATUS:
3100                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3101                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3102                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3103                 break;
3104         case KVM_S390_SET_INITIAL_PSW: {
3105                 psw_t psw;
3106
3107                 r = -EFAULT;
3108                 if (copy_from_user(&psw, argp, sizeof(psw)))
3109                         break;
3110                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3111                 break;
3112         }
3113         case KVM_S390_INITIAL_RESET:
3114                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3115                 break;
3116         case KVM_SET_ONE_REG:
3117         case KVM_GET_ONE_REG: {
3118                 struct kvm_one_reg reg;
3119                 r = -EFAULT;
3120                 if (copy_from_user(&reg, argp, sizeof(reg)))
3121                         break;
3122                 if (ioctl == KVM_SET_ONE_REG)
3123                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3124                 else
3125                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3126                 break;
3127         }
3128 #ifdef CONFIG_KVM_S390_UCONTROL
3129         case KVM_S390_UCAS_MAP: {
3130                 struct kvm_s390_ucas_mapping ucasmap;
3131
3132                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3133                         r = -EFAULT;
3134                         break;
3135                 }
3136
3137                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3138                         r = -EINVAL;
3139                         break;
3140                 }
3141
3142                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3143                                      ucasmap.vcpu_addr, ucasmap.length);
3144                 break;
3145         }
3146         case KVM_S390_UCAS_UNMAP: {
3147                 struct kvm_s390_ucas_mapping ucasmap;
3148
3149                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3150                         r = -EFAULT;
3151                         break;
3152                 }
3153
3154                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3155                         r = -EINVAL;
3156                         break;
3157                 }
3158
3159                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3160                         ucasmap.length);
3161                 break;
3162         }
3163 #endif
3164         case KVM_S390_VCPU_FAULT: {
3165                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3166                 break;
3167         }
3168         case KVM_ENABLE_CAP:
3169         {
3170                 struct kvm_enable_cap cap;
3171                 r = -EFAULT;
3172                 if (copy_from_user(&cap, argp, sizeof(cap)))
3173                         break;
3174                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3175                 break;
3176         }
3177         case KVM_S390_MEM_OP: {
3178                 struct kvm_s390_mem_op mem_op;
3179
3180                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3181                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3182                 else
3183                         r = -EFAULT;
3184                 break;
3185         }
3186         case KVM_S390_SET_IRQ_STATE: {
3187                 struct kvm_s390_irq_state irq_state;
3188
3189                 r = -EFAULT;
3190                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3191                         break;
3192                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3193                     irq_state.len == 0 ||
3194                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3195                         r = -EINVAL;
3196                         break;
3197                 }
3198                 r = kvm_s390_set_irq_state(vcpu,
3199                                            (void __user *) irq_state.buf,
3200                                            irq_state.len);
3201                 break;
3202         }
3203         case KVM_S390_GET_IRQ_STATE: {
3204                 struct kvm_s390_irq_state irq_state;
3205
3206                 r = -EFAULT;
3207                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3208                         break;
3209                 if (irq_state.len == 0) {
3210                         r = -EINVAL;
3211                         break;
3212                 }
3213                 r = kvm_s390_get_irq_state(vcpu,
3214                                            (__u8 __user *)  irq_state.buf,
3215                                            irq_state.len);
3216                 break;
3217         }
3218         default:
3219                 r = -ENOTTY;
3220         }
3221         return r;
3222 }
3223
3224 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3225 {
3226 #ifdef CONFIG_KVM_S390_UCONTROL
3227         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3228                  && (kvm_is_ucontrol(vcpu->kvm))) {
3229                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3230                 get_page(vmf->page);
3231                 return 0;
3232         }
3233 #endif
3234         return VM_FAULT_SIGBUS;
3235 }
3236
3237 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3238                             unsigned long npages)
3239 {
3240         return 0;
3241 }
3242
3243 /* Section: memory related */
3244 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3245                                    struct kvm_memory_slot *memslot,
3246                                    const struct kvm_userspace_memory_region *mem,
3247                                    enum kvm_mr_change change)
3248 {
3249         /* A few sanity checks. We can have memory slots which have to be
3250            located/ended at a segment boundary (1MB). The memory in userland is
3251            ok to be fragmented into various different vmas. It is okay to mmap()
3252            and munmap() stuff in this slot after doing this call at any time */
3253
3254         if (mem->userspace_addr & 0xffffful)
3255                 return -EINVAL;
3256
3257         if (mem->memory_size & 0xffffful)
3258                 return -EINVAL;
3259
3260         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3261                 return -EINVAL;
3262
3263         return 0;
3264 }
3265
3266 void kvm_arch_commit_memory_region(struct kvm *kvm,
3267                                 const struct kvm_userspace_memory_region *mem,
3268                                 const struct kvm_memory_slot *old,
3269                                 const struct kvm_memory_slot *new,
3270                                 enum kvm_mr_change change)
3271 {
3272         int rc;
3273
3274         /* If the basics of the memslot do not change, we do not want
3275          * to update the gmap. Every update causes several unnecessary
3276          * segment translation exceptions. This is usually handled just
3277          * fine by the normal fault handler + gmap, but it will also
3278          * cause faults on the prefix page of running guest CPUs.
3279          */
3280         if (old->userspace_addr == mem->userspace_addr &&
3281             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3282             old->npages * PAGE_SIZE == mem->memory_size)
3283                 return;
3284
3285         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3286                 mem->guest_phys_addr, mem->memory_size);
3287         if (rc)
3288                 pr_warn("failed to commit memory region\n");
3289         return;
3290 }
3291
3292 static inline unsigned long nonhyp_mask(int i)
3293 {
3294         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3295
3296         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3297 }
3298
3299 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3300 {
3301         vcpu->valid_wakeup = false;
3302 }
3303
3304 static int __init kvm_s390_init(void)
3305 {
3306         int i;
3307
3308         if (!sclp.has_sief2) {
3309                 pr_info("SIE not available\n");
3310                 return -ENODEV;
3311         }
3312
3313         for (i = 0; i < 16; i++)
3314                 kvm_s390_fac_list_mask[i] |=
3315                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3316
3317         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3318 }
3319
3320 static void __exit kvm_s390_exit(void)
3321 {
3322         kvm_exit();
3323 }
3324
3325 module_init(kvm_s390_init);
3326 module_exit(kvm_s390_exit);
3327
3328 /*
3329  * Enable autoloading of the kvm module.
3330  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3331  * since x86 takes a different approach.
3332  */
3333 #include <linux/miscdevice.h>
3334 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3335 MODULE_ALIAS("devname:kvm");