arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  83         { "instruction_spx", VCPU_STAT(instruction_spx) },
  84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  85         { "instruction_stap", VCPU_STAT(instruction_stap) },
  86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  90         { "instruction_essa", VCPU_STAT(instruction_essa) },
  91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 110         { "diagnose_10", VCPU_STAT(diagnose_10) },
 111         { "diagnose_44", VCPU_STAT(diagnose_44) },
 112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 113         { "diagnose_258", VCPU_STAT(diagnose_258) },
 114         { "diagnose_308", VCPU_STAT(diagnose_308) },
 115         { "diagnose_500", VCPU_STAT(diagnose_500) },
 116         { NULL }
 117 };
 118
 119 /* upper facilities limit for kvm */
 120 unsigned long kvm_s390_fac_list_mask[] = {
 121         0xffe6fffbfcfdfc40UL,
 122         0x005e800000000000UL,
 123 };
 124
 125 unsigned long kvm_s390_fac_list_mask_size(void)
 126 {
 127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 129 }
 130
 131 static struct gmap_notifier gmap_notifier;
 132 debug_info_t *kvm_s390_dbf;
 133
 134 /* Section: not file related */
 135 int kvm_arch_hardware_enable(void)
 136 {
 137         /* every s390 is virtualization enabled ;-) */
 138         return 0;
 139 }
 140
 141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 142
 143 /*
 144  * This callback is executed during stop_machine(). All CPUs are therefore
 145  * temporarily stopped. In order not to change guest behavior, we have to
 146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 147  * so a CPU won't be stopped while calculating with the epoch.
 148  */
 149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 150                           void *v)
 151 {
 152         struct kvm *kvm;
 153         struct kvm_vcpu *vcpu;
 154         int i;
 155         unsigned long long *delta = v;
 156
 157         list_for_each_entry(kvm, &vm_list, vm_list) {
 158                 kvm->arch.epoch -= *delta;
 159                 kvm_for_each_vcpu(i, vcpu, kvm) {
 160                         vcpu->arch.sie_block->epoch -= *delta;
 161                 }
 162         }
 163         return NOTIFY_OK;
 164 }
 165
 166 static struct notifier_block kvm_clock_notifier = {
 167         .notifier_call = kvm_clock_sync,
 168 };
 169
 170 int kvm_arch_hardware_setup(void)
 171 {
 172         gmap_notifier.notifier_call = kvm_gmap_notifier;
 173         gmap_register_ipte_notifier(&gmap_notifier);
 174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 175                                        &kvm_clock_notifier);
 176         return 0;
 177 }
 178
 179 void kvm_arch_hardware_unsetup(void)
 180 {
 181         gmap_unregister_ipte_notifier(&gmap_notifier);
 182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 183                                          &kvm_clock_notifier);
 184 }
 185
 186 int kvm_arch_init(void *opaque)
 187 {
 188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 189         if (!kvm_s390_dbf)
 190                 return -ENOMEM;
 191
 192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 193                 debug_unregister(kvm_s390_dbf);
 194                 return -ENOMEM;
 195         }
 196
 197         /* Register floating interrupt controller interface. */
 198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 199 }
 200
 201 void kvm_arch_exit(void)
 202 {
 203         debug_unregister(kvm_s390_dbf);
 204 }
 205
 206 /* Section: device related */
 207 long kvm_arch_dev_ioctl(struct file *filp,
 208                         unsigned int ioctl, unsigned long arg)
 209 {
 210         if (ioctl == KVM_S390_ENABLE_SIE)
 211                 return s390_enable_sie();
 212         return -EINVAL;
 213 }
 214
 215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 216 {
 217         int r;
 218
 219         switch (ext) {
 220         case KVM_CAP_S390_PSW:
 221         case KVM_CAP_S390_GMAP:
 222         case KVM_CAP_SYNC_MMU:
 223 #ifdef CONFIG_KVM_S390_UCONTROL
 224         case KVM_CAP_S390_UCONTROL:
 225 #endif
 226         case KVM_CAP_ASYNC_PF:
 227         case KVM_CAP_SYNC_REGS:
 228         case KVM_CAP_ONE_REG:
 229         case KVM_CAP_ENABLE_CAP:
 230         case KVM_CAP_S390_CSS_SUPPORT:
 231         case KVM_CAP_IOEVENTFD:
 232         case KVM_CAP_DEVICE_CTRL:
 233         case KVM_CAP_ENABLE_CAP_VM:
 234         case KVM_CAP_S390_IRQCHIP:
 235         case KVM_CAP_VM_ATTRIBUTES:
 236         case KVM_CAP_MP_STATE:
 237         case KVM_CAP_S390_INJECT_IRQ:
 238         case KVM_CAP_S390_USER_SIGP:
 239         case KVM_CAP_S390_USER_STSI:
 240         case KVM_CAP_S390_SKEYS:
 241         case KVM_CAP_S390_IRQ_STATE:
 242                 r = 1;
 243                 break;
 244         case KVM_CAP_S390_MEM_OP:
 245                 r = MEM_OP_MAX_SIZE;
 246                 break;
 247         case KVM_CAP_NR_VCPUS:
 248         case KVM_CAP_MAX_VCPUS:
 249                 r = KVM_MAX_VCPUS;
 250                 break;
 251         case KVM_CAP_NR_MEMSLOTS:
 252                 r = KVM_USER_MEM_SLOTS;
 253                 break;
 254         case KVM_CAP_S390_COW:
 255                 r = MACHINE_HAS_ESOP;
 256                 break;
 257         case KVM_CAP_S390_VECTOR_REGISTERS:
 258                 r = MACHINE_HAS_VX;
 259                 break;
 260         default:
 261                 r = 0;
 262         }
 263         return r;
 264 }
 265
 266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 267                                         struct kvm_memory_slot *memslot)
 268 {
 269         gfn_t cur_gfn, last_gfn;
 270         unsigned long address;
 271         struct gmap *gmap = kvm->arch.gmap;
 272
 273         down_read(&gmap->mm->mmap_sem);
 274         /* Loop over all guest pages */
 275         last_gfn = memslot->base_gfn + memslot->npages;
 276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 278
 279                 if (gmap_test_and_clear_dirty(address, gmap))
 280                         mark_page_dirty(kvm, cur_gfn);
 281         }
 282         up_read(&gmap->mm->mmap_sem);
 283 }
 284
 285 /* Section: vm related */
 286 /*
 287  * Get (and clear) the dirty memory log for a memory slot.
 288  */
 289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 290                                struct kvm_dirty_log *log)
 291 {
 292         int r;
 293         unsigned long n;
 294         struct kvm_memslots *slots;
 295         struct kvm_memory_slot *memslot;
 296         int is_dirty = 0;
 297
 298         mutex_lock(&kvm->slots_lock);
 299
 300         r = -EINVAL;
 301         if (log->slot >= KVM_USER_MEM_SLOTS)
 302                 goto out;
 303
 304         slots = kvm_memslots(kvm);
 305         memslot = id_to_memslot(slots, log->slot);
 306         r = -ENOENT;
 307         if (!memslot->dirty_bitmap)
 308                 goto out;
 309
 310         kvm_s390_sync_dirty_log(kvm, memslot);
 311         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 312         if (r)
 313                 goto out;
 314
 315         /* Clear the dirty log */
 316         if (is_dirty) {
 317                 n = kvm_dirty_bitmap_bytes(memslot);
 318                 memset(memslot->dirty_bitmap, 0, n);
 319         }
 320         r = 0;
 321 out:
 322         mutex_unlock(&kvm->slots_lock);
 323         return r;
 324 }
 325
 326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 327 {
 328         int r;
 329
 330         if (cap->flags)
 331                 return -EINVAL;
 332
 333         switch (cap->cap) {
 334         case KVM_CAP_S390_IRQCHIP:
 335                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 336                 kvm->arch.use_irqchip = 1;
 337                 r = 0;
 338                 break;
 339         case KVM_CAP_S390_USER_SIGP:
 340                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 341                 kvm->arch.user_sigp = 1;
 342                 r = 0;
 343                 break;
 344         case KVM_CAP_S390_VECTOR_REGISTERS:
 345                 if (MACHINE_HAS_VX) {
 346                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 347                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 348                         r = 0;
 349                 } else
 350                         r = -EINVAL;
 351                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 352                          r ? "(not available)" : "(success)");
 353                 break;
 354         case KVM_CAP_S390_USER_STSI:
 355                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 356                 kvm->arch.user_stsi = 1;
 357                 r = 0;
 358                 break;
 359         default:
 360                 r = -EINVAL;
 361                 break;
 362         }
 363         return r;
 364 }
 365
 366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 367 {
 368         int ret;
 369
 370         switch (attr->attr) {
 371         case KVM_S390_VM_MEM_LIMIT_SIZE:
 372                 ret = 0;
 373                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 374                          kvm->arch.gmap->asce_end);
 375                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 376                         ret = -EFAULT;
 377                 break;
 378         default:
 379                 ret = -ENXIO;
 380                 break;
 381         }
 382         return ret;
 383 }
 384
 385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 386 {
 387         int ret;
 388         unsigned int idx;
 389         switch (attr->attr) {
 390         case KVM_S390_VM_MEM_ENABLE_CMMA:
 391                 /* enable CMMA only for z10 and later (EDAT_1) */
 392                 ret = -EINVAL;
 393                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 394                         break;
 395
 396                 ret = -EBUSY;
 397                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 398                 mutex_lock(&kvm->lock);
 399                 if (atomic_read(&kvm->online_vcpus) == 0) {
 400                         kvm->arch.use_cmma = 1;
 401                         ret = 0;
 402                 }
 403                 mutex_unlock(&kvm->lock);
 404                 break;
 405         case KVM_S390_VM_MEM_CLR_CMMA:
 406                 ret = -EINVAL;
 407                 if (!kvm->arch.use_cmma)
 408                         break;
 409
 410                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 411                 mutex_lock(&kvm->lock);
 412                 idx = srcu_read_lock(&kvm->srcu);
 413                 s390_reset_cmma(kvm->arch.gmap->mm);
 414                 srcu_read_unlock(&kvm->srcu, idx);
 415                 mutex_unlock(&kvm->lock);
 416                 ret = 0;
 417                 break;
 418         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 419                 unsigned long new_limit;
 420
 421                 if (kvm_is_ucontrol(kvm))
 422                         return -EINVAL;
 423
 424                 if (get_user(new_limit, (u64 __user *)attr->addr))
 425                         return -EFAULT;
 426
 427                 if (new_limit > kvm->arch.gmap->asce_end)
 428                         return -E2BIG;
 429
 430                 ret = -EBUSY;
 431                 mutex_lock(&kvm->lock);
 432                 if (atomic_read(&kvm->online_vcpus) == 0) {
 433                         /* gmap_alloc will round the limit up */
 434                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 435
 436                         if (!new) {
 437                                 ret = -ENOMEM;
 438                         } else {
 439                                 gmap_free(kvm->arch.gmap);
 440                                 new->private = kvm;
 441                                 kvm->arch.gmap = new;
 442                                 ret = 0;
 443                         }
 444                 }
 445                 mutex_unlock(&kvm->lock);
 446                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 447                 break;
 448         }
 449         default:
 450                 ret = -ENXIO;
 451                 break;
 452         }
 453         return ret;
 454 }
 455
 456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 457
 458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 459 {
 460         struct kvm_vcpu *vcpu;
 461         int i;
 462
 463         if (!test_kvm_facility(kvm, 76))
 464                 return -EINVAL;
 465
 466         mutex_lock(&kvm->lock);
 467         switch (attr->attr) {
 468         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 469                 get_random_bytes(
 470                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 471                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 472                 kvm->arch.crypto.aes_kw = 1;
 473                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 474                 break;
 475         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 476                 get_random_bytes(
 477                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 478                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 479                 kvm->arch.crypto.dea_kw = 1;
 480                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 481                 break;
 482         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 483                 kvm->arch.crypto.aes_kw = 0;
 484                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 485                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 486                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 487                 break;
 488         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 489                 kvm->arch.crypto.dea_kw = 0;
 490                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 491                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 492                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 493                 break;
 494         default:
 495                 mutex_unlock(&kvm->lock);
 496                 return -ENXIO;
 497         }
 498
 499         kvm_for_each_vcpu(i, vcpu, kvm) {
 500                 kvm_s390_vcpu_crypto_setup(vcpu);
 501                 exit_sie(vcpu);
 502         }
 503         mutex_unlock(&kvm->lock);
 504         return 0;
 505 }
 506
 507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 508 {
 509         u8 gtod_high;
 510
 511         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 512                                            sizeof(gtod_high)))
 513                 return -EFAULT;
 514
 515         if (gtod_high != 0)
 516                 return -EINVAL;
 517         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 518
 519         return 0;
 520 }
 521
 522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 523 {
 524         u64 gtod;
 525
 526         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 527                 return -EFAULT;
 528
 529         kvm_s390_set_tod_clock(kvm, gtod);
 530         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 531         return 0;
 532 }
 533
 534 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 535 {
 536         int ret;
 537
 538         if (attr->flags)
 539                 return -EINVAL;
 540
 541         switch (attr->attr) {
 542         case KVM_S390_VM_TOD_HIGH:
 543                 ret = kvm_s390_set_tod_high(kvm, attr);
 544                 break;
 545         case KVM_S390_VM_TOD_LOW:
 546                 ret = kvm_s390_set_tod_low(kvm, attr);
 547                 break;
 548         default:
 549                 ret = -ENXIO;
 550                 break;
 551         }
 552         return ret;
 553 }
 554
 555 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 556 {
 557         u8 gtod_high = 0;
 558
 559         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 560                                          sizeof(gtod_high)))
 561                 return -EFAULT;
 562         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 563
 564         return 0;
 565 }
 566
 567 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 568 {
 569         u64 gtod;
 570
 571         gtod = kvm_s390_get_tod_clock_fast(kvm);
 572         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 573                 return -EFAULT;
 574         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 575
 576         return 0;
 577 }
 578
 579 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 580 {
 581         int ret;
 582
 583         if (attr->flags)
 584                 return -EINVAL;
 585
 586         switch (attr->attr) {
 587         case KVM_S390_VM_TOD_HIGH:
 588                 ret = kvm_s390_get_tod_high(kvm, attr);
 589                 break;
 590         case KVM_S390_VM_TOD_LOW:
 591                 ret = kvm_s390_get_tod_low(kvm, attr);
 592                 break;
 593         default:
 594                 ret = -ENXIO;
 595                 break;
 596         }
 597         return ret;
 598 }
 599
 600 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 601 {
 602         struct kvm_s390_vm_cpu_processor *proc;
 603         int ret = 0;
 604
 605         mutex_lock(&kvm->lock);
 606         if (atomic_read(&kvm->online_vcpus)) {
 607                 ret = -EBUSY;
 608                 goto out;
 609         }
 610         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 611         if (!proc) {
 612                 ret = -ENOMEM;
 613                 goto out;
 614         }
 615         if (!copy_from_user(proc, (void __user *)attr->addr,
 616                             sizeof(*proc))) {
 617                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 618                        sizeof(struct cpuid));
 619                 kvm->arch.model.ibc = proc->ibc;
 620                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 621                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 622         } else
 623                 ret = -EFAULT;
 624         kfree(proc);
 625 out:
 626         mutex_unlock(&kvm->lock);
 627         return ret;
 628 }
 629
 630 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 631 {
 632         int ret = -ENXIO;
 633
 634         switch (attr->attr) {
 635         case KVM_S390_VM_CPU_PROCESSOR:
 636                 ret = kvm_s390_set_processor(kvm, attr);
 637                 break;
 638         }
 639         return ret;
 640 }
 641
 642 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 643 {
 644         struct kvm_s390_vm_cpu_processor *proc;
 645         int ret = 0;
 646
 647         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 648         if (!proc) {
 649                 ret = -ENOMEM;
 650                 goto out;
 651         }
 652         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 653         proc->ibc = kvm->arch.model.ibc;
 654         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 655         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 656                 ret = -EFAULT;
 657         kfree(proc);
 658 out:
 659         return ret;
 660 }
 661
 662 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 663 {
 664         struct kvm_s390_vm_cpu_machine *mach;
 665         int ret = 0;
 666
 667         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 668         if (!mach) {
 669                 ret = -ENOMEM;
 670                 goto out;
 671         }
 672         get_cpu_id((struct cpuid *) &mach->cpuid);
 673         mach->ibc = sclp.ibc;
 674         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 675                S390_ARCH_FAC_LIST_SIZE_BYTE);
 676         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 677                S390_ARCH_FAC_LIST_SIZE_BYTE);
 678         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 679                 ret = -EFAULT;
 680         kfree(mach);
 681 out:
 682         return ret;
 683 }
 684
 685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 686 {
 687         int ret = -ENXIO;
 688
 689         switch (attr->attr) {
 690         case KVM_S390_VM_CPU_PROCESSOR:
 691                 ret = kvm_s390_get_processor(kvm, attr);
 692                 break;
 693         case KVM_S390_VM_CPU_MACHINE:
 694                 ret = kvm_s390_get_machine(kvm, attr);
 695                 break;
 696         }
 697         return ret;
 698 }
 699
 700 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 701 {
 702         int ret;
 703
 704         switch (attr->group) {
 705         case KVM_S390_VM_MEM_CTRL:
 706                 ret = kvm_s390_set_mem_control(kvm, attr);
 707                 break;
 708         case KVM_S390_VM_TOD:
 709                 ret = kvm_s390_set_tod(kvm, attr);
 710                 break;
 711         case KVM_S390_VM_CPU_MODEL:
 712                 ret = kvm_s390_set_cpu_model(kvm, attr);
 713                 break;
 714         case KVM_S390_VM_CRYPTO:
 715                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 716                 break;
 717         default:
 718                 ret = -ENXIO;
 719                 break;
 720         }
 721
 722         return ret;
 723 }
 724
 725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 726 {
 727         int ret;
 728
 729         switch (attr->group) {
 730         case KVM_S390_VM_MEM_CTRL:
 731                 ret = kvm_s390_get_mem_control(kvm, attr);
 732                 break;
 733         case KVM_S390_VM_TOD:
 734                 ret = kvm_s390_get_tod(kvm, attr);
 735                 break;
 736         case KVM_S390_VM_CPU_MODEL:
 737                 ret = kvm_s390_get_cpu_model(kvm, attr);
 738                 break;
 739         default:
 740                 ret = -ENXIO;
 741                 break;
 742         }
 743
 744         return ret;
 745 }
 746
 747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 748 {
 749         int ret;
 750
 751         switch (attr->group) {
 752         case KVM_S390_VM_MEM_CTRL:
 753                 switch (attr->attr) {
 754                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 755                 case KVM_S390_VM_MEM_CLR_CMMA:
 756                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 757                         ret = 0;
 758                         break;
 759                 default:
 760                         ret = -ENXIO;
 761                         break;
 762                 }
 763                 break;
 764         case KVM_S390_VM_TOD:
 765                 switch (attr->attr) {
 766                 case KVM_S390_VM_TOD_LOW:
 767                 case KVM_S390_VM_TOD_HIGH:
 768                         ret = 0;
 769                         break;
 770                 default:
 771                         ret = -ENXIO;
 772                         break;
 773                 }
 774                 break;
 775         case KVM_S390_VM_CPU_MODEL:
 776                 switch (attr->attr) {
 777                 case KVM_S390_VM_CPU_PROCESSOR:
 778                 case KVM_S390_VM_CPU_MACHINE:
 779                         ret = 0;
 780                         break;
 781                 default:
 782                         ret = -ENXIO;
 783                         break;
 784                 }
 785                 break;
 786         case KVM_S390_VM_CRYPTO:
 787                 switch (attr->attr) {
 788                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 789                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 790                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 791                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 792                         ret = 0;
 793                         break;
 794                 default:
 795                         ret = -ENXIO;
 796                         break;
 797                 }
 798                 break;
 799         default:
 800                 ret = -ENXIO;
 801                 break;
 802         }
 803
 804         return ret;
 805 }
 806
 807 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 808 {
 809         uint8_t *keys;
 810         uint64_t hva;
 811         unsigned long curkey;
 812         int i, r = 0;
 813
 814         if (args->flags != 0)
 815                 return -EINVAL;
 816
 817         /* Is this guest using storage keys? */
 818         if (!mm_use_skey(current->mm))
 819                 return KVM_S390_GET_SKEYS_NONE;
 820
 821         /* Enforce sane limit on memory allocation */
 822         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 823                 return -EINVAL;
 824
 825         keys = kmalloc_array(args->count, sizeof(uint8_t),
 826                              GFP_KERNEL | __GFP_NOWARN);
 827         if (!keys)
 828                 keys = vmalloc(sizeof(uint8_t) * args->count);
 829         if (!keys)
 830                 return -ENOMEM;
 831
 832         for (i = 0; i < args->count; i++) {
 833                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 834                 if (kvm_is_error_hva(hva)) {
 835                         r = -EFAULT;
 836                         goto out;
 837                 }
 838
 839                 curkey = get_guest_storage_key(current->mm, hva);
 840                 if (IS_ERR_VALUE(curkey)) {
 841                         r = curkey;
 842                         goto out;
 843                 }
 844                 keys[i] = curkey;
 845         }
 846
 847         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 848                          sizeof(uint8_t) * args->count);
 849         if (r)
 850                 r = -EFAULT;
 851 out:
 852         kvfree(keys);
 853         return r;
 854 }
 855
 856 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 857 {
 858         uint8_t *keys;
 859         uint64_t hva;
 860         int i, r = 0;
 861
 862         if (args->flags != 0)
 863                 return -EINVAL;
 864
 865         /* Enforce sane limit on memory allocation */
 866         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 867                 return -EINVAL;
 868
 869         keys = kmalloc_array(args->count, sizeof(uint8_t),
 870                              GFP_KERNEL | __GFP_NOWARN);
 871         if (!keys)
 872                 keys = vmalloc(sizeof(uint8_t) * args->count);
 873         if (!keys)
 874                 return -ENOMEM;
 875
 876         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 877                            sizeof(uint8_t) * args->count);
 878         if (r) {
 879                 r = -EFAULT;
 880                 goto out;
 881         }
 882
 883         /* Enable storage key handling for the guest */
 884         r = s390_enable_skey();
 885         if (r)
 886                 goto out;
 887
 888         for (i = 0; i < args->count; i++) {
 889                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 890                 if (kvm_is_error_hva(hva)) {
 891                         r = -EFAULT;
 892                         goto out;
 893                 }
 894
 895                 /* Lowest order bit is reserved */
 896                 if (keys[i] & 0x01) {
 897                         r = -EINVAL;
 898                         goto out;
 899                 }
 900
 901                 r = set_guest_storage_key(current->mm, hva,
 902                                           (unsigned long)keys[i], 0);
 903                 if (r)
 904                         goto out;
 905         }
 906 out:
 907         kvfree(keys);
 908         return r;
 909 }
 910
 911 long kvm_arch_vm_ioctl(struct file *filp,
 912                        unsigned int ioctl, unsigned long arg)
 913 {
 914         struct kvm *kvm = filp->private_data;
 915         void __user *argp = (void __user *)arg;
 916         struct kvm_device_attr attr;
 917         int r;
 918
 919         switch (ioctl) {
 920         case KVM_S390_INTERRUPT: {
 921                 struct kvm_s390_interrupt s390int;
 922
 923                 r = -EFAULT;
 924                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 925                         break;
 926                 r = kvm_s390_inject_vm(kvm, &s390int);
 927                 break;
 928         }
 929         case KVM_ENABLE_CAP: {
 930                 struct kvm_enable_cap cap;
 931                 r = -EFAULT;
 932                 if (copy_from_user(&cap, argp, sizeof(cap)))
 933                         break;
 934                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 935                 break;
 936         }
 937         case KVM_CREATE_IRQCHIP: {
 938                 struct kvm_irq_routing_entry routing;
 939
 940                 r = -EINVAL;
 941                 if (kvm->arch.use_irqchip) {
 942                         /* Set up dummy routing. */
 943                         memset(&routing, 0, sizeof(routing));
 944                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 945                 }
 946                 break;
 947         }
 948         case KVM_SET_DEVICE_ATTR: {
 949                 r = -EFAULT;
 950                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 951                         break;
 952                 r = kvm_s390_vm_set_attr(kvm, &attr);
 953                 break;
 954         }
 955         case KVM_GET_DEVICE_ATTR: {
 956                 r = -EFAULT;
 957                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 958                         break;
 959                 r = kvm_s390_vm_get_attr(kvm, &attr);
 960                 break;
 961         }
 962         case KVM_HAS_DEVICE_ATTR: {
 963                 r = -EFAULT;
 964                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 965                         break;
 966                 r = kvm_s390_vm_has_attr(kvm, &attr);
 967                 break;
 968         }
 969         case KVM_S390_GET_SKEYS: {
 970                 struct kvm_s390_skeys args;
 971
 972                 r = -EFAULT;
 973                 if (copy_from_user(&args, argp,
 974                                    sizeof(struct kvm_s390_skeys)))
 975                         break;
 976                 r = kvm_s390_get_skeys(kvm, &args);
 977                 break;
 978         }
 979         case KVM_S390_SET_SKEYS: {
 980                 struct kvm_s390_skeys args;
 981
 982                 r = -EFAULT;
 983                 if (copy_from_user(&args, argp,
 984                                    sizeof(struct kvm_s390_skeys)))
 985                         break;
 986                 r = kvm_s390_set_skeys(kvm, &args);
 987                 break;
 988         }
 989         default:
 990                 r = -ENOTTY;
 991         }
 992
 993         return r;
 994 }
 995
 996 static int kvm_s390_query_ap_config(u8 *config)
 997 {
 998         u32 fcn_code = 0x04000000UL;
 999         u32 cc = 0;
1000
1001         memset(config, 0, 128);
1002         asm volatile(
1003                 "lgr 0,%1\n"
1004                 "lgr 2,%2\n"
1005                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1006                 "0: ipm %0\n"
1007                 "srl %0,28\n"
1008                 "1:\n"
1009                 EX_TABLE(0b, 1b)
1010                 : "+r" (cc)
1011                 : "r" (fcn_code), "r" (config)
1012                 : "cc", "0", "2", "memory"
1013         );
1014
1015         return cc;
1016 }
1017
1018 static int kvm_s390_apxa_installed(void)
1019 {
1020         u8 config[128];
1021         int cc;
1022
1023         if (test_facility(2) && test_facility(12)) {
1024                 cc = kvm_s390_query_ap_config(config);
1025
1026                 if (cc)
1027                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1028                 else
1029                         return config[0] & 0x40;
1030         }
1031
1032         return 0;
1033 }
1034
1035 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1036 {
1037         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1038
1039         if (kvm_s390_apxa_installed())
1040                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1041         else
1042                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1043 }
1044
1045 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1046 {
1047         get_cpu_id(cpu_id);
1048         cpu_id->version = 0xff;
1049 }
1050
1051 static int kvm_s390_crypto_init(struct kvm *kvm)
1052 {
1053         if (!test_kvm_facility(kvm, 76))
1054                 return 0;
1055
1056         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1057                                          GFP_KERNEL | GFP_DMA);
1058         if (!kvm->arch.crypto.crycb)
1059                 return -ENOMEM;
1060
1061         kvm_s390_set_crycb_format(kvm);
1062
1063         /* Enable AES/DEA protected key functions by default */
1064         kvm->arch.crypto.aes_kw = 1;
1065         kvm->arch.crypto.dea_kw = 1;
1066         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1067                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1068         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1069                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1070
1071         return 0;
1072 }
1073
1074 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1075 {
1076         int i, rc;
1077         char debug_name[16];
1078         static unsigned long sca_offset;
1079
1080         rc = -EINVAL;
1081 #ifdef CONFIG_KVM_S390_UCONTROL
1082         if (type & ~KVM_VM_S390_UCONTROL)
1083                 goto out_err;
1084         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1085                 goto out_err;
1086 #else
1087         if (type)
1088                 goto out_err;
1089 #endif
1090
1091         rc = s390_enable_sie();
1092         if (rc)
1093                 goto out_err;
1094
1095         rc = -ENOMEM;
1096
1097         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1098         if (!kvm->arch.sca)
1099                 goto out_err;
1100         spin_lock(&kvm_lock);
1101         sca_offset += 16;
1102         if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1103                 sca_offset = 0;
1104         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1105         spin_unlock(&kvm_lock);
1106
1107         sprintf(debug_name, "kvm-%u", current->pid);
1108
1109         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1110         if (!kvm->arch.dbf)
1111                 goto out_err;
1112
1113         /*
1114          * The architectural maximum amount of facilities is 16 kbit. To store
1115          * this amount, 2 kbyte of memory is required. Thus we need a full
1116          * page to hold the guest facility list (arch.model.fac->list) and the
1117          * facility mask (arch.model.fac->mask). Its address size has to be
1118          * 31 bits and word aligned.
1119          */
1120         kvm->arch.model.fac =
1121                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1122         if (!kvm->arch.model.fac)
1123                 goto out_err;
1124
1125         /* Populate the facility mask initially. */
1126         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1127                S390_ARCH_FAC_LIST_SIZE_BYTE);
1128         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1129                 if (i < kvm_s390_fac_list_mask_size())
1130                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1131                 else
1132                         kvm->arch.model.fac->mask[i] = 0UL;
1133         }
1134
1135         /* Populate the facility list initially. */
1136         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1137                S390_ARCH_FAC_LIST_SIZE_BYTE);
1138
1139         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1140         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1141
1142         if (kvm_s390_crypto_init(kvm) < 0)
1143                 goto out_err;
1144
1145         spin_lock_init(&kvm->arch.float_int.lock);
1146         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1147                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1148         init_waitqueue_head(&kvm->arch.ipte_wq);
1149         mutex_init(&kvm->arch.ipte_mutex);
1150
1151         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1152         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1153
1154         if (type & KVM_VM_S390_UCONTROL) {
1155                 kvm->arch.gmap = NULL;
1156         } else {
1157                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1158                 if (!kvm->arch.gmap)
1159                         goto out_err;
1160                 kvm->arch.gmap->private = kvm;
1161                 kvm->arch.gmap->pfault_enabled = 0;
1162         }
1163
1164         kvm->arch.css_support = 0;
1165         kvm->arch.use_irqchip = 0;
1166         kvm->arch.epoch = 0;
1167
1168         spin_lock_init(&kvm->arch.start_stop_lock);
1169         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1170
1171         return 0;
1172 out_err:
1173         kfree(kvm->arch.crypto.crycb);
1174         free_page((unsigned long)kvm->arch.model.fac);
1175         debug_unregister(kvm->arch.dbf);
1176         free_page((unsigned long)(kvm->arch.sca));
1177         KVM_EVENT(3, "creation of vm failed: %d", rc);
1178         return rc;
1179 }
1180
1181 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1182 {
1183         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1184         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1185         kvm_s390_clear_local_irqs(vcpu);
1186         kvm_clear_async_pf_completion_queue(vcpu);
1187         if (!kvm_is_ucontrol(vcpu->kvm)) {
1188                 clear_bit(63 - vcpu->vcpu_id,
1189                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1190                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1191                     (__u64) vcpu->arch.sie_block)
1192                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1193         }
1194         smp_mb();
1195
1196         if (kvm_is_ucontrol(vcpu->kvm))
1197                 gmap_free(vcpu->arch.gmap);
1198
1199         if (vcpu->kvm->arch.use_cmma)
1200                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1201         free_page((unsigned long)(vcpu->arch.sie_block));
1202
1203         kvm_vcpu_uninit(vcpu);
1204         kmem_cache_free(kvm_vcpu_cache, vcpu);
1205 }
1206
1207 static void kvm_free_vcpus(struct kvm *kvm)
1208 {
1209         unsigned int i;
1210         struct kvm_vcpu *vcpu;
1211
1212         kvm_for_each_vcpu(i, vcpu, kvm)
1213                 kvm_arch_vcpu_destroy(vcpu);
1214
1215         mutex_lock(&kvm->lock);
1216         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1217                 kvm->vcpus[i] = NULL;
1218
1219         atomic_set(&kvm->online_vcpus, 0);
1220         mutex_unlock(&kvm->lock);
1221 }
1222
1223 void kvm_arch_destroy_vm(struct kvm *kvm)
1224 {
1225         kvm_free_vcpus(kvm);
1226         free_page((unsigned long)kvm->arch.model.fac);
1227         free_page((unsigned long)(kvm->arch.sca));
1228         debug_unregister(kvm->arch.dbf);
1229         kfree(kvm->arch.crypto.crycb);
1230         if (!kvm_is_ucontrol(kvm))
1231                 gmap_free(kvm->arch.gmap);
1232         kvm_s390_destroy_adapters(kvm);
1233         kvm_s390_clear_float_irqs(kvm);
1234         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1235 }
1236
1237 /* Section: vcpu related */
1238 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1239 {
1240         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1241         if (!vcpu->arch.gmap)
1242                 return -ENOMEM;
1243         vcpu->arch.gmap->private = vcpu->kvm;
1244
1245         return 0;
1246 }
1247
1248 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1249 {
1250         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1251         kvm_clear_async_pf_completion_queue(vcpu);
1252         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1253                                     KVM_SYNC_GPRS |
1254                                     KVM_SYNC_ACRS |
1255                                     KVM_SYNC_CRS |
1256                                     KVM_SYNC_ARCH0 |
1257                                     KVM_SYNC_PFAULT;
1258         if (test_kvm_facility(vcpu->kvm, 129))
1259                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1260
1261         if (kvm_is_ucontrol(vcpu->kvm))
1262                 return __kvm_ucontrol_vcpu_init(vcpu);
1263
1264         return 0;
1265 }
1266
1267 /*
1268  * Backs up the current FP/VX register save area on a particular
1269  * destination.  Used to switch between different register save
1270  * areas.
1271  */
1272 static inline void save_fpu_to(struct fpu *dst)
1273 {
1274         dst->fpc = current->thread.fpu.fpc;
1275         dst->regs = current->thread.fpu.regs;
1276 }
1277
1278 /*
1279  * Switches the FP/VX register save area from which to lazy
1280  * restore register contents.
1281  */
1282 static inline void load_fpu_from(struct fpu *from)
1283 {
1284         current->thread.fpu.fpc = from->fpc;
1285         current->thread.fpu.regs = from->regs;
1286 }
1287
1288 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1289 {
1290         /* Save host register state */
1291         save_fpu_regs();
1292         save_fpu_to(&vcpu->arch.host_fpregs);
1293
1294         if (test_kvm_facility(vcpu->kvm, 129)) {
1295                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1296                 /*
1297                  * Use the register save area in the SIE-control block
1298                  * for register restore and save in kvm_arch_vcpu_put()
1299                  */
1300                 current->thread.fpu.vxrs =
1301                         (__vector128 *)&vcpu->run->s.regs.vrs;
1302         } else
1303                 load_fpu_from(&vcpu->arch.guest_fpregs);
1304
1305         if (test_fp_ctl(current->thread.fpu.fpc))
1306                 /* User space provided an invalid FPC, let's clear it */
1307                 current->thread.fpu.fpc = 0;
1308
1309         save_access_regs(vcpu->arch.host_acrs);
1310         restore_access_regs(vcpu->run->s.regs.acrs);
1311         gmap_enable(vcpu->arch.gmap);
1312         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1313 }
1314
1315 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1316 {
1317         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1318         gmap_disable(vcpu->arch.gmap);
1319
1320         save_fpu_regs();
1321
1322         if (test_kvm_facility(vcpu->kvm, 129))
1323                 /*
1324                  * kvm_arch_vcpu_load() set up the register save area to
1325                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1326                  * are already saved.  Only the floating-point control must be
1327                  * copied.
1328                  */
1329                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1330         else
1331                 save_fpu_to(&vcpu->arch.guest_fpregs);
1332         load_fpu_from(&vcpu->arch.host_fpregs);
1333
1334         save_access_regs(vcpu->run->s.regs.acrs);
1335         restore_access_regs(vcpu->arch.host_acrs);
1336 }
1337
1338 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1339 {
1340         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1341         vcpu->arch.sie_block->gpsw.mask = 0UL;
1342         vcpu->arch.sie_block->gpsw.addr = 0UL;
1343         kvm_s390_set_prefix(vcpu, 0);
1344         vcpu->arch.sie_block->cputm     = 0UL;
1345         vcpu->arch.sie_block->ckc       = 0UL;
1346         vcpu->arch.sie_block->todpr     = 0;
1347         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1348         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1349         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1350         vcpu->arch.guest_fpregs.fpc = 0;
1351         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1352         vcpu->arch.sie_block->gbea = 1;
1353         vcpu->arch.sie_block->pp = 0;
1354         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1355         kvm_clear_async_pf_completion_queue(vcpu);
1356         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1357                 kvm_s390_vcpu_stop(vcpu);
1358         kvm_s390_clear_local_irqs(vcpu);
1359 }
1360
1361 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1362 {
1363         mutex_lock(&vcpu->kvm->lock);
1364         preempt_disable();
1365         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1366         preempt_enable();
1367         mutex_unlock(&vcpu->kvm->lock);
1368         if (!kvm_is_ucontrol(vcpu->kvm))
1369                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1370 }
1371
1372 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1373 {
1374         if (!test_kvm_facility(vcpu->kvm, 76))
1375                 return;
1376
1377         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1378
1379         if (vcpu->kvm->arch.crypto.aes_kw)
1380                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1381         if (vcpu->kvm->arch.crypto.dea_kw)
1382                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1383
1384         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1385 }
1386
1387 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1388 {
1389         free_page(vcpu->arch.sie_block->cbrlo);
1390         vcpu->arch.sie_block->cbrlo = 0;
1391 }
1392
1393 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1394 {
1395         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1396         if (!vcpu->arch.sie_block->cbrlo)
1397                 return -ENOMEM;
1398
1399         vcpu->arch.sie_block->ecb2 |= 0x80;
1400         vcpu->arch.sie_block->ecb2 &= ~0x08;
1401         return 0;
1402 }
1403
1404 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1405 {
1406         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1407
1408         vcpu->arch.cpu_id = model->cpu_id;
1409         vcpu->arch.sie_block->ibc = model->ibc;
1410         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1411 }
1412
1413 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1414 {
1415         int rc = 0;
1416
1417         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1418                                                     CPUSTAT_SM |
1419                                                     CPUSTAT_STOPPED);
1420
1421         if (test_kvm_facility(vcpu->kvm, 78))
1422                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1423         else if (test_kvm_facility(vcpu->kvm, 8))
1424                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1425
1426         kvm_s390_vcpu_setup_model(vcpu);
1427
1428         vcpu->arch.sie_block->ecb   = 6;
1429         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1430                 vcpu->arch.sie_block->ecb |= 0x10;
1431
1432         vcpu->arch.sie_block->ecb2  = 8;
1433         vcpu->arch.sie_block->eca   = 0xC1002000U;
1434         if (sclp.has_siif)
1435                 vcpu->arch.sie_block->eca |= 1;
1436         if (sclp.has_sigpif)
1437                 vcpu->arch.sie_block->eca |= 0x10000000U;
1438         if (test_kvm_facility(vcpu->kvm, 129)) {
1439                 vcpu->arch.sie_block->eca |= 0x00020000;
1440                 vcpu->arch.sie_block->ecd |= 0x20000000;
1441         }
1442         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1443
1444         if (vcpu->kvm->arch.use_cmma) {
1445                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1446                 if (rc)
1447                         return rc;
1448         }
1449         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1450         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1451
1452         kvm_s390_vcpu_crypto_setup(vcpu);
1453
1454         return rc;
1455 }
1456
1457 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1458                                       unsigned int id)
1459 {
1460         struct kvm_vcpu *vcpu;
1461         struct sie_page *sie_page;
1462         int rc = -EINVAL;
1463
1464         if (id >= KVM_MAX_VCPUS)
1465                 goto out;
1466
1467         rc = -ENOMEM;
1468
1469         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1470         if (!vcpu)
1471                 goto out;
1472
1473         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1474         if (!sie_page)
1475                 goto out_free_cpu;
1476
1477         vcpu->arch.sie_block = &sie_page->sie_block;
1478         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1479
1480         vcpu->arch.sie_block->icpua = id;
1481         if (!kvm_is_ucontrol(kvm)) {
1482                 if (!kvm->arch.sca) {
1483                         WARN_ON_ONCE(1);
1484                         goto out_free_cpu;
1485                 }
1486                 if (!kvm->arch.sca->cpu[id].sda)
1487                         kvm->arch.sca->cpu[id].sda =
1488                                 (__u64) vcpu->arch.sie_block;
1489                 vcpu->arch.sie_block->scaoh =
1490                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1491                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1492                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1493         }
1494
1495         spin_lock_init(&vcpu->arch.local_int.lock);
1496         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1497         vcpu->arch.local_int.wq = &vcpu->wq;
1498         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1499
1500         /*
1501          * Allocate a save area for floating-point registers.  If the vector
1502          * extension is available, register contents are saved in the SIE
1503          * control block.  The allocated save area is still required in
1504          * particular places, for example, in kvm_s390_vcpu_store_status().
1505          */
1506         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1507                                                GFP_KERNEL);
1508         if (!vcpu->arch.guest_fpregs.fprs) {
1509                 rc = -ENOMEM;
1510                 goto out_free_sie_block;
1511         }
1512
1513         rc = kvm_vcpu_init(vcpu, kvm, id);
1514         if (rc)
1515                 goto out_free_sie_block;
1516         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1517                  vcpu->arch.sie_block);
1518         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1519
1520         return vcpu;
1521 out_free_sie_block:
1522         free_page((unsigned long)(vcpu->arch.sie_block));
1523 out_free_cpu:
1524         kmem_cache_free(kvm_vcpu_cache, vcpu);
1525 out:
1526         return ERR_PTR(rc);
1527 }
1528
1529 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1530 {
1531         return kvm_s390_vcpu_has_irq(vcpu, 0);
1532 }
1533
1534 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1535 {
1536         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1537         exit_sie(vcpu);
1538 }
1539
1540 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1541 {
1542         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1543 }
1544
1545 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1546 {
1547         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1548         exit_sie(vcpu);
1549 }
1550
1551 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1552 {
1553         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1554 }
1555
1556 /*
1557  * Kick a guest cpu out of SIE and wait until SIE is not running.
1558  * If the CPU is not running (e.g. waiting as idle) the function will
1559  * return immediately. */
1560 void exit_sie(struct kvm_vcpu *vcpu)
1561 {
1562         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1563         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1564                 cpu_relax();
1565 }
1566
1567 /* Kick a guest cpu out of SIE to process a request synchronously */
1568 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1569 {
1570         kvm_make_request(req, vcpu);
1571         kvm_s390_vcpu_request(vcpu);
1572 }
1573
1574 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1575 {
1576         int i;
1577         struct kvm *kvm = gmap->private;
1578         struct kvm_vcpu *vcpu;
1579
1580         kvm_for_each_vcpu(i, vcpu, kvm) {
1581                 /* match against both prefix pages */
1582                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1583                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1584                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1585                 }
1586         }
1587 }
1588
1589 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1590 {
1591         /* kvm common code refers to this, but never calls it */
1592         BUG();
1593         return 0;
1594 }
1595
1596 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1597                                            struct kvm_one_reg *reg)
1598 {
1599         int r = -EINVAL;
1600
1601         switch (reg->id) {
1602         case KVM_REG_S390_TODPR:
1603                 r = put_user(vcpu->arch.sie_block->todpr,
1604                              (u32 __user *)reg->addr);
1605                 break;
1606         case KVM_REG_S390_EPOCHDIFF:
1607                 r = put_user(vcpu->arch.sie_block->epoch,
1608                              (u64 __user *)reg->addr);
1609                 break;
1610         case KVM_REG_S390_CPU_TIMER:
1611                 r = put_user(vcpu->arch.sie_block->cputm,
1612                              (u64 __user *)reg->addr);
1613                 break;
1614         case KVM_REG_S390_CLOCK_COMP:
1615                 r = put_user(vcpu->arch.sie_block->ckc,
1616                              (u64 __user *)reg->addr);
1617                 break;
1618         case KVM_REG_S390_PFTOKEN:
1619                 r = put_user(vcpu->arch.pfault_token,
1620                              (u64 __user *)reg->addr);
1621                 break;
1622         case KVM_REG_S390_PFCOMPARE:
1623                 r = put_user(vcpu->arch.pfault_compare,
1624                              (u64 __user *)reg->addr);
1625                 break;
1626         case KVM_REG_S390_PFSELECT:
1627                 r = put_user(vcpu->arch.pfault_select,
1628                              (u64 __user *)reg->addr);
1629                 break;
1630         case KVM_REG_S390_PP:
1631                 r = put_user(vcpu->arch.sie_block->pp,
1632                              (u64 __user *)reg->addr);
1633                 break;
1634         case KVM_REG_S390_GBEA:
1635                 r = put_user(vcpu->arch.sie_block->gbea,
1636                              (u64 __user *)reg->addr);
1637                 break;
1638         default:
1639                 break;
1640         }
1641
1642         return r;
1643 }
1644
1645 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1646                                            struct kvm_one_reg *reg)
1647 {
1648         int r = -EINVAL;
1649
1650         switch (reg->id) {
1651         case KVM_REG_S390_TODPR:
1652                 r = get_user(vcpu->arch.sie_block->todpr,
1653                              (u32 __user *)reg->addr);
1654                 break;
1655         case KVM_REG_S390_EPOCHDIFF:
1656                 r = get_user(vcpu->arch.sie_block->epoch,
1657                              (u64 __user *)reg->addr);
1658                 break;
1659         case KVM_REG_S390_CPU_TIMER:
1660                 r = get_user(vcpu->arch.sie_block->cputm,
1661                              (u64 __user *)reg->addr);
1662                 break;
1663         case KVM_REG_S390_CLOCK_COMP:
1664                 r = get_user(vcpu->arch.sie_block->ckc,
1665                              (u64 __user *)reg->addr);
1666                 break;
1667         case KVM_REG_S390_PFTOKEN:
1668                 r = get_user(vcpu->arch.pfault_token,
1669                              (u64 __user *)reg->addr);
1670                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1671                         kvm_clear_async_pf_completion_queue(vcpu);
1672                 break;
1673         case KVM_REG_S390_PFCOMPARE:
1674                 r = get_user(vcpu->arch.pfault_compare,
1675                              (u64 __user *)reg->addr);
1676                 break;
1677         case KVM_REG_S390_PFSELECT:
1678                 r = get_user(vcpu->arch.pfault_select,
1679                              (u64 __user *)reg->addr);
1680                 break;
1681         case KVM_REG_S390_PP:
1682                 r = get_user(vcpu->arch.sie_block->pp,
1683                              (u64 __user *)reg->addr);
1684                 break;
1685         case KVM_REG_S390_GBEA:
1686                 r = get_user(vcpu->arch.sie_block->gbea,
1687                              (u64 __user *)reg->addr);
1688                 break;
1689         default:
1690                 break;
1691         }
1692
1693         return r;
1694 }
1695
1696 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1697 {
1698         kvm_s390_vcpu_initial_reset(vcpu);
1699         return 0;
1700 }
1701
1702 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1703 {
1704         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1705         return 0;
1706 }
1707
1708 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1709 {
1710         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1711         return 0;
1712 }
1713
1714 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1715                                   struct kvm_sregs *sregs)
1716 {
1717         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1718         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1719         restore_access_regs(vcpu->run->s.regs.acrs);
1720         return 0;
1721 }
1722
1723 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1724                                   struct kvm_sregs *sregs)
1725 {
1726         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1727         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1728         return 0;
1729 }
1730
1731 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1732 {
1733         if (test_fp_ctl(fpu->fpc))
1734                 return -EINVAL;
1735         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1736         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1737         save_fpu_regs();
1738         load_fpu_from(&vcpu->arch.guest_fpregs);
1739         return 0;
1740 }
1741
1742 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1743 {
1744         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1745         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1746         return 0;
1747 }
1748
1749 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1750 {
1751         int rc = 0;
1752
1753         if (!is_vcpu_stopped(vcpu))
1754                 rc = -EBUSY;
1755         else {
1756                 vcpu->run->psw_mask = psw.mask;
1757                 vcpu->run->psw_addr = psw.addr;
1758         }
1759         return rc;
1760 }
1761
1762 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1763                                   struct kvm_translation *tr)
1764 {
1765         return -EINVAL; /* not implemented yet */
1766 }
1767
1768 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1769                               KVM_GUESTDBG_USE_HW_BP | \
1770                               KVM_GUESTDBG_ENABLE)
1771
1772 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1773                                         struct kvm_guest_debug *dbg)
1774 {
1775         int rc = 0;
1776
1777         vcpu->guest_debug = 0;
1778         kvm_s390_clear_bp_data(vcpu);
1779
1780         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1781                 return -EINVAL;
1782
1783         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1784                 vcpu->guest_debug = dbg->control;
1785                 /* enforce guest PER */
1786                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1787
1788                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1789                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1790         } else {
1791                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1792                 vcpu->arch.guestdbg.last_bp = 0;
1793         }
1794
1795         if (rc) {
1796                 vcpu->guest_debug = 0;
1797                 kvm_s390_clear_bp_data(vcpu);
1798                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1799         }
1800
1801         return rc;
1802 }
1803
1804 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1805                                     struct kvm_mp_state *mp_state)
1806 {
1807         /* CHECK_STOP and LOAD are not supported yet */
1808         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1809                                        KVM_MP_STATE_OPERATING;
1810 }
1811
1812 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1813                                     struct kvm_mp_state *mp_state)
1814 {
1815         int rc = 0;
1816
1817         /* user space knows about this interface - let it control the state */
1818         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1819
1820         switch (mp_state->mp_state) {
1821         case KVM_MP_STATE_STOPPED:
1822                 kvm_s390_vcpu_stop(vcpu);
1823                 break;
1824         case KVM_MP_STATE_OPERATING:
1825                 kvm_s390_vcpu_start(vcpu);
1826                 break;
1827         case KVM_MP_STATE_LOAD:
1828         case KVM_MP_STATE_CHECK_STOP:
1829                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1830         default:
1831                 rc = -ENXIO;
1832         }
1833
1834         return rc;
1835 }
1836
1837 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1838 {
1839         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1840 }
1841
1842 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1843 {
1844 retry:
1845         kvm_s390_vcpu_request_handled(vcpu);
1846         if (!vcpu->requests)
1847                 return 0;
1848         /*
1849          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1850          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1851          * This ensures that the ipte instruction for this request has
1852          * already finished. We might race against a second unmapper that
1853          * wants to set the blocking bit. Lets just retry the request loop.
1854          */
1855         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1856                 int rc;
1857                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1858                                       kvm_s390_get_prefix(vcpu),
1859                                       PAGE_SIZE * 2);
1860                 if (rc)
1861                         return rc;
1862                 goto retry;
1863         }
1864
1865         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1866                 vcpu->arch.sie_block->ihcpu = 0xffff;
1867                 goto retry;
1868         }
1869
1870         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1871                 if (!ibs_enabled(vcpu)) {
1872                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1873                         atomic_or(CPUSTAT_IBS,
1874                                         &vcpu->arch.sie_block->cpuflags);
1875                 }
1876                 goto retry;
1877         }
1878
1879         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1880                 if (ibs_enabled(vcpu)) {
1881                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1882                         atomic_andnot(CPUSTAT_IBS,
1883                                           &vcpu->arch.sie_block->cpuflags);
1884                 }
1885                 goto retry;
1886         }
1887
1888         /* nothing to do, just clear the request */
1889         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1890
1891         return 0;
1892 }
1893
1894 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1895 {
1896         struct kvm_vcpu *vcpu;
1897         int i;
1898
1899         mutex_lock(&kvm->lock);
1900         preempt_disable();
1901         kvm->arch.epoch = tod - get_tod_clock();
1902         kvm_s390_vcpu_block_all(kvm);
1903         kvm_for_each_vcpu(i, vcpu, kvm)
1904                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1905         kvm_s390_vcpu_unblock_all(kvm);
1906         preempt_enable();
1907         mutex_unlock(&kvm->lock);
1908 }
1909
1910 /**
1911  * kvm_arch_fault_in_page - fault-in guest page if necessary
1912  * @vcpu: The corresponding virtual cpu
1913  * @gpa: Guest physical address
1914  * @writable: Whether the page should be writable or not
1915  *
1916  * Make sure that a guest page has been faulted-in on the host.
1917  *
1918  * Return: Zero on success, negative error code otherwise.
1919  */
1920 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1921 {
1922         return gmap_fault(vcpu->arch.gmap, gpa,
1923                           writable ? FAULT_FLAG_WRITE : 0);
1924 }
1925
1926 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1927                                       unsigned long token)
1928 {
1929         struct kvm_s390_interrupt inti;
1930         struct kvm_s390_irq irq;
1931
1932         if (start_token) {
1933                 irq.u.ext.ext_params2 = token;
1934                 irq.type = KVM_S390_INT_PFAULT_INIT;
1935                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1936         } else {
1937                 inti.type = KVM_S390_INT_PFAULT_DONE;
1938                 inti.parm64 = token;
1939                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1940         }
1941 }
1942
1943 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1944                                      struct kvm_async_pf *work)
1945 {
1946         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1947         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1948 }
1949
1950 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1951                                  struct kvm_async_pf *work)
1952 {
1953         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1954         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1955 }
1956
1957 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1958                                struct kvm_async_pf *work)
1959 {
1960         /* s390 will always inject the page directly */
1961 }
1962
1963 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1964 {
1965         /*
1966          * s390 will always inject the page directly,
1967          * but we still want check_async_completion to cleanup
1968          */
1969         return true;
1970 }
1971
1972 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1973 {
1974         hva_t hva;
1975         struct kvm_arch_async_pf arch;
1976         int rc;
1977
1978         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1979                 return 0;
1980         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1981             vcpu->arch.pfault_compare)
1982                 return 0;
1983         if (psw_extint_disabled(vcpu))
1984                 return 0;
1985         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1986                 return 0;
1987         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1988                 return 0;
1989         if (!vcpu->arch.gmap->pfault_enabled)
1990                 return 0;
1991
1992         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1993         hva += current->thread.gmap_addr & ~PAGE_MASK;
1994         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1995                 return 0;
1996
1997         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1998         return rc;
1999 }
2000
2001 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2002 {
2003         int rc, cpuflags;
2004
2005         /*
2006          * On s390 notifications for arriving pages will be delivered directly
2007          * to the guest but the house keeping for completed pfaults is
2008          * handled outside the worker.
2009          */
2010         kvm_check_async_pf_completion(vcpu);
2011
2012         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2013
2014         if (need_resched())
2015                 schedule();
2016
2017         if (test_cpu_flag(CIF_MCCK_PENDING))
2018                 s390_handle_mcck();
2019
2020         if (!kvm_is_ucontrol(vcpu->kvm)) {
2021                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2022                 if (rc)
2023                         return rc;
2024         }
2025
2026         rc = kvm_s390_handle_requests(vcpu);
2027         if (rc)
2028                 return rc;
2029
2030         if (guestdbg_enabled(vcpu)) {
2031                 kvm_s390_backup_guest_per_regs(vcpu);
2032                 kvm_s390_patch_guest_per_regs(vcpu);
2033         }
2034
2035         vcpu->arch.sie_block->icptcode = 0;
2036         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2037         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2038         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2039
2040         return 0;
2041 }
2042
2043 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2044 {
2045         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2046         u8 opcode;
2047         int rc;
2048
2049         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2050         trace_kvm_s390_sie_fault(vcpu);
2051
2052         /*
2053          * We want to inject an addressing exception, which is defined as a
2054          * suppressing or terminating exception. However, since we came here
2055          * by a DAT access exception, the PSW still points to the faulting
2056          * instruction since DAT exceptions are nullifying. So we've got
2057          * to look up the current opcode to get the length of the instruction
2058          * to be able to forward the PSW.
2059          */
2060         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2061         if (rc)
2062                 return kvm_s390_inject_prog_cond(vcpu, rc);
2063         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2064
2065         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2066 }
2067
2068 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2069 {
2070         int rc = -1;
2071
2072         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2073                    vcpu->arch.sie_block->icptcode);
2074         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2075
2076         if (guestdbg_enabled(vcpu))
2077                 kvm_s390_restore_guest_per_regs(vcpu);
2078
2079         if (exit_reason >= 0) {
2080                 rc = 0;
2081         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2082                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2083                 vcpu->run->s390_ucontrol.trans_exc_code =
2084                                                 current->thread.gmap_addr;
2085                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2086                 rc = -EREMOTE;
2087
2088         } else if (current->thread.gmap_pfault) {
2089                 trace_kvm_s390_major_guest_pfault(vcpu);
2090                 current->thread.gmap_pfault = 0;
2091                 if (kvm_arch_setup_async_pf(vcpu)) {
2092                         rc = 0;
2093                 } else {
2094                         gpa_t gpa = current->thread.gmap_addr;
2095                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2096                 }
2097         }
2098
2099         if (rc == -1)
2100                 rc = vcpu_post_run_fault_in_sie(vcpu);
2101
2102         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2103
2104         if (rc == 0) {
2105                 if (kvm_is_ucontrol(vcpu->kvm))
2106                         /* Don't exit for host interrupts. */
2107                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2108                 else
2109                         rc = kvm_handle_sie_intercept(vcpu);
2110         }
2111
2112         return rc;
2113 }
2114
2115 static int __vcpu_run(struct kvm_vcpu *vcpu)
2116 {
2117         int rc, exit_reason;
2118
2119         /*
2120          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2121          * ning the guest), so that memslots (and other stuff) are protected
2122          */
2123         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2124
2125         do {
2126                 rc = vcpu_pre_run(vcpu);
2127                 if (rc)
2128                         break;
2129
2130                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2131                 /*
2132                  * As PF_VCPU will be used in fault handler, between
2133                  * guest_enter and guest_exit should be no uaccess.
2134                  */
2135                 local_irq_disable();
2136                 __kvm_guest_enter();
2137                 local_irq_enable();
2138                 exit_reason = sie64a(vcpu->arch.sie_block,
2139                                      vcpu->run->s.regs.gprs);
2140                 local_irq_disable();
2141                 __kvm_guest_exit();
2142                 local_irq_enable();
2143                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2144
2145                 rc = vcpu_post_run(vcpu, exit_reason);
2146         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2147
2148         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2149         return rc;
2150 }
2151
2152 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2153 {
2154         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2155         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2156         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2157                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2158         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2159                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2160                 /* some control register changes require a tlb flush */
2161                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2162         }
2163         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2164                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2165                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2166                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2167                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2168                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2169         }
2170         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2171                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2172                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2173                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2174                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2175                         kvm_clear_async_pf_completion_queue(vcpu);
2176         }
2177         kvm_run->kvm_dirty_regs = 0;
2178 }
2179
2180 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2181 {
2182         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2183         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2184         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2185         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2186         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2187         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2188         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2189         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2190         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2191         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2192         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2193         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2194 }
2195
2196 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2197 {
2198         int rc;
2199         sigset_t sigsaved;
2200
2201         if (guestdbg_exit_pending(vcpu)) {
2202                 kvm_s390_prepare_debug_exit(vcpu);
2203                 return 0;
2204         }
2205
2206         if (vcpu->sigset_active)
2207                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2208
2209         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2210                 kvm_s390_vcpu_start(vcpu);
2211         } else if (is_vcpu_stopped(vcpu)) {
2212                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2213                                    vcpu->vcpu_id);
2214                 return -EINVAL;
2215         }
2216
2217         sync_regs(vcpu, kvm_run);
2218
2219         might_fault();
2220         rc = __vcpu_run(vcpu);
2221
2222         if (signal_pending(current) && !rc) {
2223                 kvm_run->exit_reason = KVM_EXIT_INTR;
2224                 rc = -EINTR;
2225         }
2226
2227         if (guestdbg_exit_pending(vcpu) && !rc)  {
2228                 kvm_s390_prepare_debug_exit(vcpu);
2229                 rc = 0;
2230         }
2231
2232         if (rc == -EOPNOTSUPP) {
2233                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2234                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2235                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2236                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2237                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2238                 rc = 0;
2239         }
2240
2241         if (rc == -EREMOTE) {
2242                 /* intercept was handled, but userspace support is needed
2243                  * kvm_run has been prepared by the handler */
2244                 rc = 0;
2245         }
2246
2247         store_regs(vcpu, kvm_run);
2248
2249         if (vcpu->sigset_active)
2250                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2251
2252         vcpu->stat.exit_userspace++;
2253         return rc;
2254 }
2255
2256 /*
2257  * store status at address
2258  * we use have two special cases:
2259  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2260  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2261  */
2262 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2263 {
2264         unsigned char archmode = 1;
2265         unsigned int px;
2266         u64 clkcomp;
2267         int rc;
2268
2269         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2270                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2271                         return -EFAULT;
2272                 gpa = SAVE_AREA_BASE;
2273         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2274                 if (write_guest_real(vcpu, 163, &archmode, 1))
2275                         return -EFAULT;
2276                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2277         }
2278         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2279                              vcpu->arch.guest_fpregs.fprs, 128);
2280         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2281                               vcpu->run->s.regs.gprs, 128);
2282         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2283                               &vcpu->arch.sie_block->gpsw, 16);
2284         px = kvm_s390_get_prefix(vcpu);
2285         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2286                               &px, 4);
2287         rc |= write_guest_abs(vcpu,
2288                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2289                               &vcpu->arch.guest_fpregs.fpc, 4);
2290         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2291                               &vcpu->arch.sie_block->todpr, 4);
2292         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2293                               &vcpu->arch.sie_block->cputm, 8);
2294         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2295         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2296                               &clkcomp, 8);
2297         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2298                               &vcpu->run->s.regs.acrs, 64);
2299         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2300                               &vcpu->arch.sie_block->gcr, 128);
2301         return rc ? -EFAULT : 0;
2302 }
2303
2304 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2305 {
2306         /*
2307          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2308          * copying in vcpu load/put. Lets update our copies before we save
2309          * it into the save area
2310          */
2311         save_fpu_regs();
2312         if (test_kvm_facility(vcpu->kvm, 129)) {
2313                 /*
2314                  * If the vector extension is available, the vector registers
2315                  * which overlaps with floating-point registers are saved in
2316                  * the SIE-control block.  Hence, extract the floating-point
2317                  * registers and the FPC value and store them in the
2318                  * guest_fpregs structure.
2319                  */
2320                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2321                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2322                                  current->thread.fpu.vxrs);
2323         } else
2324                 save_fpu_to(&vcpu->arch.guest_fpregs);
2325         save_access_regs(vcpu->run->s.regs.acrs);
2326
2327         return kvm_s390_store_status_unloaded(vcpu, addr);
2328 }
2329
2330 /*
2331  * store additional status at address
2332  */
2333 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2334                                         unsigned long gpa)
2335 {
2336         /* Only bits 0-53 are used for address formation */
2337         if (!(gpa & ~0x3ff))
2338                 return 0;
2339
2340         return write_guest_abs(vcpu, gpa & ~0x3ff,
2341                                (void *)&vcpu->run->s.regs.vrs, 512);
2342 }
2343
2344 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2345 {
2346         if (!test_kvm_facility(vcpu->kvm, 129))
2347                 return 0;
2348
2349         /*
2350          * The guest VXRS are in the host VXRs due to the lazy
2351          * copying in vcpu load/put. We can simply call save_fpu_regs()
2352          * to save the current register state because we are in the
2353          * middle of a load/put cycle.
2354          *
2355          * Let's update our copies before we save it into the save area.
2356          */
2357         save_fpu_regs();
2358
2359         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2360 }
2361
2362 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2363 {
2364         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2365         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2366 }
2367
2368 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2369 {
2370         unsigned int i;
2371         struct kvm_vcpu *vcpu;
2372
2373         kvm_for_each_vcpu(i, vcpu, kvm) {
2374                 __disable_ibs_on_vcpu(vcpu);
2375         }
2376 }
2377
2378 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2379 {
2380         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2381         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2382 }
2383
2384 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2385 {
2386         int i, online_vcpus, started_vcpus = 0;
2387
2388         if (!is_vcpu_stopped(vcpu))
2389                 return;
2390
2391         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2392         /* Only one cpu at a time may enter/leave the STOPPED state. */
2393         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2394         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2395
2396         for (i = 0; i < online_vcpus; i++) {
2397                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2398                         started_vcpus++;
2399         }
2400
2401         if (started_vcpus == 0) {
2402                 /* we're the only active VCPU -> speed it up */
2403                 __enable_ibs_on_vcpu(vcpu);
2404         } else if (started_vcpus == 1) {
2405                 /*
2406                  * As we are starting a second VCPU, we have to disable
2407                  * the IBS facility on all VCPUs to remove potentially
2408                  * oustanding ENABLE requests.
2409                  */
2410                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2411         }
2412
2413         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2414         /*
2415          * Another VCPU might have used IBS while we were offline.
2416          * Let's play safe and flush the VCPU at startup.
2417          */
2418         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2419         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2420         return;
2421 }
2422
2423 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2424 {
2425         int i, online_vcpus, started_vcpus = 0;
2426         struct kvm_vcpu *started_vcpu = NULL;
2427
2428         if (is_vcpu_stopped(vcpu))
2429                 return;
2430
2431         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2432         /* Only one cpu at a time may enter/leave the STOPPED state. */
2433         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2434         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2435
2436         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2437         kvm_s390_clear_stop_irq(vcpu);
2438
2439         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2440         __disable_ibs_on_vcpu(vcpu);
2441
2442         for (i = 0; i < online_vcpus; i++) {
2443                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2444                         started_vcpus++;
2445                         started_vcpu = vcpu->kvm->vcpus[i];
2446                 }
2447         }
2448
2449         if (started_vcpus == 1) {
2450                 /*
2451                  * As we only have one VCPU left, we want to enable the
2452                  * IBS facility for that VCPU to speed it up.
2453                  */
2454                 __enable_ibs_on_vcpu(started_vcpu);
2455         }
2456
2457         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2458         return;
2459 }
2460
2461 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2462                                      struct kvm_enable_cap *cap)
2463 {
2464         int r;
2465
2466         if (cap->flags)
2467                 return -EINVAL;
2468
2469         switch (cap->cap) {
2470         case KVM_CAP_S390_CSS_SUPPORT:
2471                 if (!vcpu->kvm->arch.css_support) {
2472                         vcpu->kvm->arch.css_support = 1;
2473                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2474                         trace_kvm_s390_enable_css(vcpu->kvm);
2475                 }
2476                 r = 0;
2477                 break;
2478         default:
2479                 r = -EINVAL;
2480                 break;
2481         }
2482         return r;
2483 }
2484
2485 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2486                                   struct kvm_s390_mem_op *mop)
2487 {
2488         void __user *uaddr = (void __user *)mop->buf;
2489         void *tmpbuf = NULL;
2490         int r, srcu_idx;
2491         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2492                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2493
2494         if (mop->flags & ~supported_flags)
2495                 return -EINVAL;
2496
2497         if (mop->size > MEM_OP_MAX_SIZE)
2498                 return -E2BIG;
2499
2500         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2501                 tmpbuf = vmalloc(mop->size);
2502                 if (!tmpbuf)
2503                         return -ENOMEM;
2504         }
2505
2506         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2507
2508         switch (mop->op) {
2509         case KVM_S390_MEMOP_LOGICAL_READ:
2510                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2511                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2512                         break;
2513                 }
2514                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2515                 if (r == 0) {
2516                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2517                                 r = -EFAULT;
2518                 }
2519                 break;
2520         case KVM_S390_MEMOP_LOGICAL_WRITE:
2521                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2522                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2523                         break;
2524                 }
2525                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2526                         r = -EFAULT;
2527                         break;
2528                 }
2529                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2530                 break;
2531         default:
2532                 r = -EINVAL;
2533         }
2534
2535         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2536
2537         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2538                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2539
2540         vfree(tmpbuf);
2541         return r;
2542 }
2543
2544 long kvm_arch_vcpu_ioctl(struct file *filp,
2545                          unsigned int ioctl, unsigned long arg)
2546 {
2547         struct kvm_vcpu *vcpu = filp->private_data;
2548         void __user *argp = (void __user *)arg;
2549         int idx;
2550         long r;
2551
2552         switch (ioctl) {
2553         case KVM_S390_IRQ: {
2554                 struct kvm_s390_irq s390irq;
2555
2556                 r = -EFAULT;
2557                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2558                         break;
2559                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2560                 break;
2561         }
2562         case KVM_S390_INTERRUPT: {
2563                 struct kvm_s390_interrupt s390int;
2564                 struct kvm_s390_irq s390irq;
2565
2566                 r = -EFAULT;
2567                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2568                         break;
2569                 if (s390int_to_s390irq(&s390int, &s390irq))
2570                         return -EINVAL;
2571                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2572                 break;
2573         }
2574         case KVM_S390_STORE_STATUS:
2575                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2576                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2577                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2578                 break;
2579         case KVM_S390_SET_INITIAL_PSW: {
2580                 psw_t psw;
2581
2582                 r = -EFAULT;
2583                 if (copy_from_user(&psw, argp, sizeof(psw)))
2584                         break;
2585                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2586                 break;
2587         }
2588         case KVM_S390_INITIAL_RESET:
2589                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2590                 break;
2591         case KVM_SET_ONE_REG:
2592         case KVM_GET_ONE_REG: {
2593                 struct kvm_one_reg reg;
2594                 r = -EFAULT;
2595                 if (copy_from_user(&reg, argp, sizeof(reg)))
2596                         break;
2597                 if (ioctl == KVM_SET_ONE_REG)
2598                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2599                 else
2600                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2601                 break;
2602         }
2603 #ifdef CONFIG_KVM_S390_UCONTROL
2604         case KVM_S390_UCAS_MAP: {
2605                 struct kvm_s390_ucas_mapping ucasmap;
2606
2607                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2608                         r = -EFAULT;
2609                         break;
2610                 }
2611
2612                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2613                         r = -EINVAL;
2614                         break;
2615                 }
2616
2617                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2618                                      ucasmap.vcpu_addr, ucasmap.length);
2619                 break;
2620         }
2621         case KVM_S390_UCAS_UNMAP: {
2622                 struct kvm_s390_ucas_mapping ucasmap;
2623
2624                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2625                         r = -EFAULT;
2626                         break;
2627                 }
2628
2629                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2630                         r = -EINVAL;
2631                         break;
2632                 }
2633
2634                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2635                         ucasmap.length);
2636                 break;
2637         }
2638 #endif
2639         case KVM_S390_VCPU_FAULT: {
2640                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2641                 break;
2642         }
2643         case KVM_ENABLE_CAP:
2644         {
2645                 struct kvm_enable_cap cap;
2646                 r = -EFAULT;
2647                 if (copy_from_user(&cap, argp, sizeof(cap)))
2648                         break;
2649                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2650                 break;
2651         }
2652         case KVM_S390_MEM_OP: {
2653                 struct kvm_s390_mem_op mem_op;
2654
2655                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2656                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2657                 else
2658                         r = -EFAULT;
2659                 break;
2660         }
2661         case KVM_S390_SET_IRQ_STATE: {
2662                 struct kvm_s390_irq_state irq_state;
2663
2664                 r = -EFAULT;
2665                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2666                         break;
2667                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2668                     irq_state.len == 0 ||
2669                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2670                         r = -EINVAL;
2671                         break;
2672                 }
2673                 r = kvm_s390_set_irq_state(vcpu,
2674                                            (void __user *) irq_state.buf,
2675                                            irq_state.len);
2676                 break;
2677         }
2678         case KVM_S390_GET_IRQ_STATE: {
2679                 struct kvm_s390_irq_state irq_state;
2680
2681                 r = -EFAULT;
2682                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2683                         break;
2684                 if (irq_state.len == 0) {
2685                         r = -EINVAL;
2686                         break;
2687                 }
2688                 r = kvm_s390_get_irq_state(vcpu,
2689                                            (__u8 __user *)  irq_state.buf,
2690                                            irq_state.len);
2691                 break;
2692         }
2693         default:
2694                 r = -ENOTTY;
2695         }
2696         return r;
2697 }
2698
2699 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2700 {
2701 #ifdef CONFIG_KVM_S390_UCONTROL
2702         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2703                  && (kvm_is_ucontrol(vcpu->kvm))) {
2704                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2705                 get_page(vmf->page);
2706                 return 0;
2707         }
2708 #endif
2709         return VM_FAULT_SIGBUS;
2710 }
2711
2712 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2713                             unsigned long npages)
2714 {
2715         return 0;
2716 }
2717
2718 /* Section: memory related */
2719 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2720                                    struct kvm_memory_slot *memslot,
2721                                    const struct kvm_userspace_memory_region *mem,
2722                                    enum kvm_mr_change change)
2723 {
2724         /* A few sanity checks. We can have memory slots which have to be
2725            located/ended at a segment boundary (1MB). The memory in userland is
2726            ok to be fragmented into various different vmas. It is okay to mmap()
2727            and munmap() stuff in this slot after doing this call at any time */
2728
2729         if (mem->userspace_addr & 0xffffful)
2730                 return -EINVAL;
2731
2732         if (mem->memory_size & 0xffffful)
2733                 return -EINVAL;
2734
2735         return 0;
2736 }
2737
2738 void kvm_arch_commit_memory_region(struct kvm *kvm,
2739                                 const struct kvm_userspace_memory_region *mem,
2740                                 const struct kvm_memory_slot *old,
2741                                 const struct kvm_memory_slot *new,
2742                                 enum kvm_mr_change change)
2743 {
2744         int rc;
2745
2746         /* If the basics of the memslot do not change, we do not want
2747          * to update the gmap. Every update causes several unnecessary
2748          * segment translation exceptions. This is usually handled just
2749          * fine by the normal fault handler + gmap, but it will also
2750          * cause faults on the prefix page of running guest CPUs.
2751          */
2752         if (old->userspace_addr == mem->userspace_addr &&
2753             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2754             old->npages * PAGE_SIZE == mem->memory_size)
2755                 return;
2756
2757         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2758                 mem->guest_phys_addr, mem->memory_size);
2759         if (rc)
2760                 pr_warn("failed to commit memory region\n");
2761         return;
2762 }
2763
2764 static int __init kvm_s390_init(void)
2765 {
2766         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2767 }
2768
2769 static void __exit kvm_s390_exit(void)
2770 {
2771         kvm_exit();
2772 }
2773
2774 module_init(kvm_s390_init);
2775 module_exit(kvm_s390_exit);
2776
2777 /*
2778  * Enable autoloading of the kvm module.
2779  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2780  * since x86 takes a different approach.
2781  */
2782 #include <linux/miscdevice.h>
2783 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2784 MODULE_ALIAS("devname:kvm");