arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62         KVM_GENERIC_VM_STATS(),
  63         STATS_DESC_COUNTER(VM, inject_io),
  64         STATS_DESC_COUNTER(VM, inject_float_mchk),
  65         STATS_DESC_COUNTER(VM, inject_pfault_done),
  66         STATS_DESC_COUNTER(VM, inject_service_signal),
  67         STATS_DESC_COUNTER(VM, inject_virtio)
  68 };
  69
  70 const struct kvm_stats_header kvm_vm_stats_header = {
  71         .name_size = KVM_STATS_NAME_SIZE,
  72         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  73         .id_offset = sizeof(struct kvm_stats_header),
  74         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  75         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  76                        sizeof(kvm_vm_stats_desc),
  77 };
  78
  79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  80         KVM_GENERIC_VCPU_STATS(),
  81         STATS_DESC_COUNTER(VCPU, exit_userspace),
  82         STATS_DESC_COUNTER(VCPU, exit_null),
  83         STATS_DESC_COUNTER(VCPU, exit_external_request),
  84         STATS_DESC_COUNTER(VCPU, exit_io_request),
  85         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  86         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  87         STATS_DESC_COUNTER(VCPU, exit_validity),
  88         STATS_DESC_COUNTER(VCPU, exit_instruction),
  89         STATS_DESC_COUNTER(VCPU, exit_pei),
  90         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  91         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  92         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  93         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  94         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  95         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  96         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  97         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  98         STATS_DESC_COUNTER(VCPU, deliver_ckc),
  99         STATS_DESC_COUNTER(VCPU, deliver_cputm),
 100         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 101         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 102         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 103         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 104         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 106         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 107         STATS_DESC_COUNTER(VCPU, deliver_program),
 108         STATS_DESC_COUNTER(VCPU, deliver_io),
 109         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 110         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 111         STATS_DESC_COUNTER(VCPU, inject_ckc),
 112         STATS_DESC_COUNTER(VCPU, inject_cputm),
 113         STATS_DESC_COUNTER(VCPU, inject_external_call),
 114         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 115         STATS_DESC_COUNTER(VCPU, inject_mchk),
 116         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 117         STATS_DESC_COUNTER(VCPU, inject_program),
 118         STATS_DESC_COUNTER(VCPU, inject_restart),
 119         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 120         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 121         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 122         STATS_DESC_COUNTER(VCPU, instruction_gs),
 123         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 124         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 125         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 126         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 127         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 128         STATS_DESC_COUNTER(VCPU, instruction_sck),
 129         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 130         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 131         STATS_DESC_COUNTER(VCPU, instruction_spx),
 132         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 133         STATS_DESC_COUNTER(VCPU, instruction_stap),
 134         STATS_DESC_COUNTER(VCPU, instruction_iske),
 135         STATS_DESC_COUNTER(VCPU, instruction_ri),
 136         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 137         STATS_DESC_COUNTER(VCPU, instruction_sske),
 138         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 139         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 140         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 141         STATS_DESC_COUNTER(VCPU, instruction_tb),
 142         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 143         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 144         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 145         STATS_DESC_COUNTER(VCPU, instruction_sie),
 146         STATS_DESC_COUNTER(VCPU, instruction_essa),
 147         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 149         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 163         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 167         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 168         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 173         STATS_DESC_COUNTER(VCPU, pfault_sync)
 174 };
 175
 176 const struct kvm_stats_header kvm_vcpu_stats_header = {
 177         .name_size = KVM_STATS_NAME_SIZE,
 178         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 179         .id_offset = sizeof(struct kvm_stats_header),
 180         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 181         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 182                        sizeof(kvm_vcpu_stats_desc),
 183 };
 184
 185 /* allow nested virtualization in KVM (if enabled by user space) */
 186 static int nested;
 187 module_param(nested, int, S_IRUGO);
 188 MODULE_PARM_DESC(nested, "Nested virtualization support");
 189
 190 /* allow 1m huge page guest backing, if !nested */
 191 static int hpage;
 192 module_param(hpage, int, 0444);
 193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 194
 195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 196 static u8 halt_poll_max_steal = 10;
 197 module_param(halt_poll_max_steal, byte, 0644);
 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 199
 200 /* if set to true, the GISA will be initialized and used if available */
 201 static bool use_gisa  = true;
 202 module_param(use_gisa, bool, 0644);
 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 204
 205 /* maximum diag9c forwarding per second */
 206 unsigned int diag9c_forwarding_hz;
 207 module_param(diag9c_forwarding_hz, uint, 0644);
 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 209
 210 /*
 211  * For now we handle at most 16 double words as this is what the s390 base
 212  * kernel handles and stores in the prefix page. If we ever need to go beyond
 213  * this, this requires changes to code, but the external uapi can stay.
 214  */
 215 #define SIZE_INTERNAL 16
 216
 217 /*
 218  * Base feature mask that defines default mask for facilities. Consists of the
 219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 220  */
 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 222 /*
 223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 224  * and defines the facilities that can be enabled via a cpu model.
 225  */
 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 227
 228 static unsigned long kvm_s390_fac_size(void)
 229 {
 230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 231         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 232         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 233                 sizeof(stfle_fac_list));
 234
 235         return SIZE_INTERNAL;
 236 }
 237
 238 /* available cpu features supported by kvm */
 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 240 /* available subfunctions indicated via query / "test bit" */
 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 242
 243 static struct gmap_notifier gmap_notifier;
 244 static struct gmap_notifier vsie_gmap_notifier;
 245 debug_info_t *kvm_s390_dbf;
 246 debug_info_t *kvm_s390_dbf_uv;
 247
 248 /* Section: not file related */
 249 int kvm_arch_hardware_enable(void)
 250 {
 251         /* every s390 is virtualization enabled ;-) */
 252         return 0;
 253 }
 254
 255 int kvm_arch_check_processor_compat(void *opaque)
 256 {
 257         return 0;
 258 }
 259
 260 /* forward declarations */
 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 262                               unsigned long end);
 263 static int sca_switch_to_extended(struct kvm *kvm);
 264
 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 266 {
 267         u8 delta_idx = 0;
 268
 269         /*
 270          * The TOD jumps by delta, we have to compensate this by adding
 271          * -delta to the epoch.
 272          */
 273         delta = -delta;
 274
 275         /* sign-extension - we're adding to signed values below */
 276         if ((s64)delta < 0)
 277                 delta_idx = -1;
 278
 279         scb->epoch += delta;
 280         if (scb->ecd & ECD_MEF) {
 281                 scb->epdx += delta_idx;
 282                 if (scb->epoch < delta)
 283                         scb->epdx += 1;
 284         }
 285 }
 286
 287 /*
 288  * This callback is executed during stop_machine(). All CPUs are therefore
 289  * temporarily stopped. In order not to change guest behavior, we have to
 290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 291  * so a CPU won't be stopped while calculating with the epoch.
 292  */
 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 294                           void *v)
 295 {
 296         struct kvm *kvm;
 297         struct kvm_vcpu *vcpu;
 298         int i;
 299         unsigned long long *delta = v;
 300
 301         list_for_each_entry(kvm, &vm_list, vm_list) {
 302                 kvm_for_each_vcpu(i, vcpu, kvm) {
 303                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 304                         if (i == 0) {
 305                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 306                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 307                         }
 308                         if (vcpu->arch.cputm_enabled)
 309                                 vcpu->arch.cputm_start += *delta;
 310                         if (vcpu->arch.vsie_block)
 311                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 312                                                    *delta);
 313                 }
 314         }
 315         return NOTIFY_OK;
 316 }
 317
 318 static struct notifier_block kvm_clock_notifier = {
 319         .notifier_call = kvm_clock_sync,
 320 };
 321
 322 int kvm_arch_hardware_setup(void *opaque)
 323 {
 324         gmap_notifier.notifier_call = kvm_gmap_notifier;
 325         gmap_register_pte_notifier(&gmap_notifier);
 326         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 327         gmap_register_pte_notifier(&vsie_gmap_notifier);
 328         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 329                                        &kvm_clock_notifier);
 330         return 0;
 331 }
 332
 333 void kvm_arch_hardware_unsetup(void)
 334 {
 335         gmap_unregister_pte_notifier(&gmap_notifier);
 336         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 337         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 338                                          &kvm_clock_notifier);
 339 }
 340
 341 static void allow_cpu_feat(unsigned long nr)
 342 {
 343         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 344 }
 345
 346 static inline int plo_test_bit(unsigned char nr)
 347 {
 348         unsigned long function = (unsigned long)nr | 0x100;
 349         int cc;
 350
 351         asm volatile(
 352                 "       lgr     0,%[function]\n"
 353                 /* Parameter registers are ignored for "test bit" */
 354                 "       plo     0,0,0,0(0)\n"
 355                 "       ipm     %0\n"
 356                 "       srl     %0,28\n"
 357                 : "=d" (cc)
 358                 : [function] "d" (function)
 359                 : "cc", "0");
 360         return cc == 0;
 361 }
 362
 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 364 {
 365         asm volatile(
 366                 "       lghi    0,0\n"
 367                 "       lgr     1,%[query]\n"
 368                 /* Parameter registers are ignored */
 369                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 370                 :
 371                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 372                 : "cc", "memory", "0", "1");
 373 }
 374
 375 #define INSN_SORTL 0xb938
 376 #define INSN_DFLTCC 0xb939
 377
 378 static void kvm_s390_cpu_feat_init(void)
 379 {
 380         int i;
 381
 382         for (i = 0; i < 256; ++i) {
 383                 if (plo_test_bit(i))
 384                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 385         }
 386
 387         if (test_facility(28)) /* TOD-clock steering */
 388                 ptff(kvm_s390_available_subfunc.ptff,
 389                      sizeof(kvm_s390_available_subfunc.ptff),
 390                      PTFF_QAF);
 391
 392         if (test_facility(17)) { /* MSA */
 393                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 394                               kvm_s390_available_subfunc.kmac);
 395                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kmc);
 397                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.km);
 399                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kimd);
 401                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.klmd);
 403         }
 404         if (test_facility(76)) /* MSA3 */
 405                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.pckmo);
 407         if (test_facility(77)) { /* MSA4 */
 408                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 409                               kvm_s390_available_subfunc.kmctr);
 410                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 411                               kvm_s390_available_subfunc.kmf);
 412                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 413                               kvm_s390_available_subfunc.kmo);
 414                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 415                               kvm_s390_available_subfunc.pcc);
 416         }
 417         if (test_facility(57)) /* MSA5 */
 418                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 419                               kvm_s390_available_subfunc.ppno);
 420
 421         if (test_facility(146)) /* MSA8 */
 422                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 423                               kvm_s390_available_subfunc.kma);
 424
 425         if (test_facility(155)) /* MSA9 */
 426                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 427                               kvm_s390_available_subfunc.kdsa);
 428
 429         if (test_facility(150)) /* SORTL */
 430                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 431
 432         if (test_facility(151)) /* DFLTCC */
 433                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 434
 435         if (MACHINE_HAS_ESOP)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 437         /*
 438          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 439          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 440          */
 441         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 442             !test_facility(3) || !nested)
 443                 return;
 444         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 445         if (sclp.has_64bscao)
 446                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 447         if (sclp.has_siif)
 448                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 449         if (sclp.has_gpere)
 450                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 451         if (sclp.has_gsls)
 452                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 453         if (sclp.has_ib)
 454                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 455         if (sclp.has_cei)
 456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 457         if (sclp.has_ibs)
 458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 459         if (sclp.has_kss)
 460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 461         /*
 462          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 463          * all skey handling functions read/set the skey from the PGSTE
 464          * instead of the real storage key.
 465          *
 466          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 467          * pages being detected as preserved although they are resident.
 468          *
 469          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 470          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 471          *
 472          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 473          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 474          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 475          *
 476          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 477          * cannot easily shadow the SCA because of the ipte lock.
 478          */
 479 }
 480
 481 int kvm_arch_init(void *opaque)
 482 {
 483         int rc = -ENOMEM;
 484
 485         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 486         if (!kvm_s390_dbf)
 487                 return -ENOMEM;
 488
 489         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 490         if (!kvm_s390_dbf_uv)
 491                 goto out;
 492
 493         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 494             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 495                 goto out;
 496
 497         kvm_s390_cpu_feat_init();
 498
 499         /* Register floating interrupt controller interface. */
 500         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 501         if (rc) {
 502                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 503                 goto out;
 504         }
 505
 506         rc = kvm_s390_gib_init(GAL_ISC);
 507         if (rc)
 508                 goto out;
 509
 510         return 0;
 511
 512 out:
 513         kvm_arch_exit();
 514         return rc;
 515 }
 516
 517 void kvm_arch_exit(void)
 518 {
 519         kvm_s390_gib_destroy();
 520         debug_unregister(kvm_s390_dbf);
 521         debug_unregister(kvm_s390_dbf_uv);
 522 }
 523
 524 /* Section: device related */
 525 long kvm_arch_dev_ioctl(struct file *filp,
 526                         unsigned int ioctl, unsigned long arg)
 527 {
 528         if (ioctl == KVM_S390_ENABLE_SIE)
 529                 return s390_enable_sie();
 530         return -EINVAL;
 531 }
 532
 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 534 {
 535         int r;
 536
 537         switch (ext) {
 538         case KVM_CAP_S390_PSW:
 539         case KVM_CAP_S390_GMAP:
 540         case KVM_CAP_SYNC_MMU:
 541 #ifdef CONFIG_KVM_S390_UCONTROL
 542         case KVM_CAP_S390_UCONTROL:
 543 #endif
 544         case KVM_CAP_ASYNC_PF:
 545         case KVM_CAP_SYNC_REGS:
 546         case KVM_CAP_ONE_REG:
 547         case KVM_CAP_ENABLE_CAP:
 548         case KVM_CAP_S390_CSS_SUPPORT:
 549         case KVM_CAP_IOEVENTFD:
 550         case KVM_CAP_DEVICE_CTRL:
 551         case KVM_CAP_S390_IRQCHIP:
 552         case KVM_CAP_VM_ATTRIBUTES:
 553         case KVM_CAP_MP_STATE:
 554         case KVM_CAP_IMMEDIATE_EXIT:
 555         case KVM_CAP_S390_INJECT_IRQ:
 556         case KVM_CAP_S390_USER_SIGP:
 557         case KVM_CAP_S390_USER_STSI:
 558         case KVM_CAP_S390_SKEYS:
 559         case KVM_CAP_S390_IRQ_STATE:
 560         case KVM_CAP_S390_USER_INSTR0:
 561         case KVM_CAP_S390_CMMA_MIGRATION:
 562         case KVM_CAP_S390_AIS:
 563         case KVM_CAP_S390_AIS_MIGRATION:
 564         case KVM_CAP_S390_VCPU_RESETS:
 565         case KVM_CAP_SET_GUEST_DEBUG:
 566         case KVM_CAP_S390_DIAG318:
 567         case KVM_CAP_S390_MEM_OP_EXTENSION:
 568                 r = 1;
 569                 break;
 570         case KVM_CAP_SET_GUEST_DEBUG2:
 571                 r = KVM_GUESTDBG_VALID_MASK;
 572                 break;
 573         case KVM_CAP_S390_HPAGE_1M:
 574                 r = 0;
 575                 if (hpage && !kvm_is_ucontrol(kvm))
 576                         r = 1;
 577                 break;
 578         case KVM_CAP_S390_MEM_OP:
 579                 r = MEM_OP_MAX_SIZE;
 580                 break;
 581         case KVM_CAP_NR_VCPUS:
 582         case KVM_CAP_MAX_VCPUS:
 583         case KVM_CAP_MAX_VCPU_ID:
 584                 r = KVM_S390_BSCA_CPU_SLOTS;
 585                 if (!kvm_s390_use_sca_entries())
 586                         r = KVM_MAX_VCPUS;
 587                 else if (sclp.has_esca && sclp.has_64bscao)
 588                         r = KVM_S390_ESCA_CPU_SLOTS;
 589                 break;
 590         case KVM_CAP_S390_COW:
 591                 r = MACHINE_HAS_ESOP;
 592                 break;
 593         case KVM_CAP_S390_VECTOR_REGISTERS:
 594                 r = MACHINE_HAS_VX;
 595                 break;
 596         case KVM_CAP_S390_RI:
 597                 r = test_facility(64);
 598                 break;
 599         case KVM_CAP_S390_GS:
 600                 r = test_facility(133);
 601                 break;
 602         case KVM_CAP_S390_BPB:
 603                 r = test_facility(82);
 604                 break;
 605         case KVM_CAP_S390_PROTECTED:
 606                 r = is_prot_virt_host();
 607                 break;
 608         default:
 609                 r = 0;
 610         }
 611         return r;
 612 }
 613
 614 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 615 {
 616         int i;
 617         gfn_t cur_gfn, last_gfn;
 618         unsigned long gaddr, vmaddr;
 619         struct gmap *gmap = kvm->arch.gmap;
 620         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 621
 622         /* Loop over all guest segments */
 623         cur_gfn = memslot->base_gfn;
 624         last_gfn = memslot->base_gfn + memslot->npages;
 625         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 626                 gaddr = gfn_to_gpa(cur_gfn);
 627                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 628                 if (kvm_is_error_hva(vmaddr))
 629                         continue;
 630
 631                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 632                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 633                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 634                         if (test_bit(i, bitmap))
 635                                 mark_page_dirty(kvm, cur_gfn + i);
 636                 }
 637
 638                 if (fatal_signal_pending(current))
 639                         return;
 640                 cond_resched();
 641         }
 642 }
 643
 644 /* Section: vm related */
 645 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 646
 647 /*
 648  * Get (and clear) the dirty memory log for a memory slot.
 649  */
 650 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 651                                struct kvm_dirty_log *log)
 652 {
 653         int r;
 654         unsigned long n;
 655         struct kvm_memory_slot *memslot;
 656         int is_dirty;
 657
 658         if (kvm_is_ucontrol(kvm))
 659                 return -EINVAL;
 660
 661         mutex_lock(&kvm->slots_lock);
 662
 663         r = -EINVAL;
 664         if (log->slot >= KVM_USER_MEM_SLOTS)
 665                 goto out;
 666
 667         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 668         if (r)
 669                 goto out;
 670
 671         /* Clear the dirty log */
 672         if (is_dirty) {
 673                 n = kvm_dirty_bitmap_bytes(memslot);
 674                 memset(memslot->dirty_bitmap, 0, n);
 675         }
 676         r = 0;
 677 out:
 678         mutex_unlock(&kvm->slots_lock);
 679         return r;
 680 }
 681
 682 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 683 {
 684         unsigned int i;
 685         struct kvm_vcpu *vcpu;
 686
 687         kvm_for_each_vcpu(i, vcpu, kvm) {
 688                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 689         }
 690 }
 691
 692 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 693 {
 694         int r;
 695
 696         if (cap->flags)
 697                 return -EINVAL;
 698
 699         switch (cap->cap) {
 700         case KVM_CAP_S390_IRQCHIP:
 701                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 702                 kvm->arch.use_irqchip = 1;
 703                 r = 0;
 704                 break;
 705         case KVM_CAP_S390_USER_SIGP:
 706                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 707                 kvm->arch.user_sigp = 1;
 708                 r = 0;
 709                 break;
 710         case KVM_CAP_S390_VECTOR_REGISTERS:
 711                 mutex_lock(&kvm->lock);
 712                 if (kvm->created_vcpus) {
 713                         r = -EBUSY;
 714                 } else if (MACHINE_HAS_VX) {
 715                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 716                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 717                         if (test_facility(134)) {
 718                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 719                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 720                         }
 721                         if (test_facility(135)) {
 722                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 723                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 724                         }
 725                         if (test_facility(148)) {
 726                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 727                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 728                         }
 729                         if (test_facility(152)) {
 730                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 731                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 732                         }
 733                         if (test_facility(192)) {
 734                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 735                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 736                         }
 737                         r = 0;
 738                 } else
 739                         r = -EINVAL;
 740                 mutex_unlock(&kvm->lock);
 741                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 742                          r ? "(not available)" : "(success)");
 743                 break;
 744         case KVM_CAP_S390_RI:
 745                 r = -EINVAL;
 746                 mutex_lock(&kvm->lock);
 747                 if (kvm->created_vcpus) {
 748                         r = -EBUSY;
 749                 } else if (test_facility(64)) {
 750                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 751                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 752                         r = 0;
 753                 }
 754                 mutex_unlock(&kvm->lock);
 755                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 756                          r ? "(not available)" : "(success)");
 757                 break;
 758         case KVM_CAP_S390_AIS:
 759                 mutex_lock(&kvm->lock);
 760                 if (kvm->created_vcpus) {
 761                         r = -EBUSY;
 762                 } else {
 763                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 764                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 765                         r = 0;
 766                 }
 767                 mutex_unlock(&kvm->lock);
 768                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 769                          r ? "(not available)" : "(success)");
 770                 break;
 771         case KVM_CAP_S390_GS:
 772                 r = -EINVAL;
 773                 mutex_lock(&kvm->lock);
 774                 if (kvm->created_vcpus) {
 775                         r = -EBUSY;
 776                 } else if (test_facility(133)) {
 777                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 778                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 779                         r = 0;
 780                 }
 781                 mutex_unlock(&kvm->lock);
 782                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 783                          r ? "(not available)" : "(success)");
 784                 break;
 785         case KVM_CAP_S390_HPAGE_1M:
 786                 mutex_lock(&kvm->lock);
 787                 if (kvm->created_vcpus)
 788                         r = -EBUSY;
 789                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 790                         r = -EINVAL;
 791                 else {
 792                         r = 0;
 793                         mmap_write_lock(kvm->mm);
 794                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 795                         mmap_write_unlock(kvm->mm);
 796                         /*
 797                          * We might have to create fake 4k page
 798                          * tables. To avoid that the hardware works on
 799                          * stale PGSTEs, we emulate these instructions.
 800                          */
 801                         kvm->arch.use_skf = 0;
 802                         kvm->arch.use_pfmfi = 0;
 803                 }
 804                 mutex_unlock(&kvm->lock);
 805                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 806                          r ? "(not available)" : "(success)");
 807                 break;
 808         case KVM_CAP_S390_USER_STSI:
 809                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 810                 kvm->arch.user_stsi = 1;
 811                 r = 0;
 812                 break;
 813         case KVM_CAP_S390_USER_INSTR0:
 814                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 815                 kvm->arch.user_instr0 = 1;
 816                 icpt_operexc_on_all_vcpus(kvm);
 817                 r = 0;
 818                 break;
 819         default:
 820                 r = -EINVAL;
 821                 break;
 822         }
 823         return r;
 824 }
 825
 826 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 827 {
 828         int ret;
 829
 830         switch (attr->attr) {
 831         case KVM_S390_VM_MEM_LIMIT_SIZE:
 832                 ret = 0;
 833                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 834                          kvm->arch.mem_limit);
 835                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 836                         ret = -EFAULT;
 837                 break;
 838         default:
 839                 ret = -ENXIO;
 840                 break;
 841         }
 842         return ret;
 843 }
 844
 845 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 846 {
 847         int ret;
 848         unsigned int idx;
 849         switch (attr->attr) {
 850         case KVM_S390_VM_MEM_ENABLE_CMMA:
 851                 ret = -ENXIO;
 852                 if (!sclp.has_cmma)
 853                         break;
 854
 855                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 856                 mutex_lock(&kvm->lock);
 857                 if (kvm->created_vcpus)
 858                         ret = -EBUSY;
 859                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 860                         ret = -EINVAL;
 861                 else {
 862                         kvm->arch.use_cmma = 1;
 863                         /* Not compatible with cmma. */
 864                         kvm->arch.use_pfmfi = 0;
 865                         ret = 0;
 866                 }
 867                 mutex_unlock(&kvm->lock);
 868                 break;
 869         case KVM_S390_VM_MEM_CLR_CMMA:
 870                 ret = -ENXIO;
 871                 if (!sclp.has_cmma)
 872                         break;
 873                 ret = -EINVAL;
 874                 if (!kvm->arch.use_cmma)
 875                         break;
 876
 877                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 878                 mutex_lock(&kvm->lock);
 879                 idx = srcu_read_lock(&kvm->srcu);
 880                 s390_reset_cmma(kvm->arch.gmap->mm);
 881                 srcu_read_unlock(&kvm->srcu, idx);
 882                 mutex_unlock(&kvm->lock);
 883                 ret = 0;
 884                 break;
 885         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 886                 unsigned long new_limit;
 887
 888                 if (kvm_is_ucontrol(kvm))
 889                         return -EINVAL;
 890
 891                 if (get_user(new_limit, (u64 __user *)attr->addr))
 892                         return -EFAULT;
 893
 894                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 895                     new_limit > kvm->arch.mem_limit)
 896                         return -E2BIG;
 897
 898                 if (!new_limit)
 899                         return -EINVAL;
 900
 901                 /* gmap_create takes last usable address */
 902                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 903                         new_limit -= 1;
 904
 905                 ret = -EBUSY;
 906                 mutex_lock(&kvm->lock);
 907                 if (!kvm->created_vcpus) {
 908                         /* gmap_create will round the limit up */
 909                         struct gmap *new = gmap_create(current->mm, new_limit);
 910
 911                         if (!new) {
 912                                 ret = -ENOMEM;
 913                         } else {
 914                                 gmap_remove(kvm->arch.gmap);
 915                                 new->private = kvm;
 916                                 kvm->arch.gmap = new;
 917                                 ret = 0;
 918                         }
 919                 }
 920                 mutex_unlock(&kvm->lock);
 921                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 922                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 923                          (void *) kvm->arch.gmap->asce);
 924                 break;
 925         }
 926         default:
 927                 ret = -ENXIO;
 928                 break;
 929         }
 930         return ret;
 931 }
 932
 933 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 934
 935 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 936 {
 937         struct kvm_vcpu *vcpu;
 938         int i;
 939
 940         kvm_s390_vcpu_block_all(kvm);
 941
 942         kvm_for_each_vcpu(i, vcpu, kvm) {
 943                 kvm_s390_vcpu_crypto_setup(vcpu);
 944                 /* recreate the shadow crycb by leaving the VSIE handler */
 945                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 946         }
 947
 948         kvm_s390_vcpu_unblock_all(kvm);
 949 }
 950
 951 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 952 {
 953         mutex_lock(&kvm->lock);
 954         switch (attr->attr) {
 955         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 956                 if (!test_kvm_facility(kvm, 76)) {
 957                         mutex_unlock(&kvm->lock);
 958                         return -EINVAL;
 959                 }
 960                 get_random_bytes(
 961                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 962                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 963                 kvm->arch.crypto.aes_kw = 1;
 964                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 965                 break;
 966         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 967                 if (!test_kvm_facility(kvm, 76)) {
 968                         mutex_unlock(&kvm->lock);
 969                         return -EINVAL;
 970                 }
 971                 get_random_bytes(
 972                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 973                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 974                 kvm->arch.crypto.dea_kw = 1;
 975                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 976                 break;
 977         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 978                 if (!test_kvm_facility(kvm, 76)) {
 979                         mutex_unlock(&kvm->lock);
 980                         return -EINVAL;
 981                 }
 982                 kvm->arch.crypto.aes_kw = 0;
 983                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 984                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 985                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 986                 break;
 987         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 988                 if (!test_kvm_facility(kvm, 76)) {
 989                         mutex_unlock(&kvm->lock);
 990                         return -EINVAL;
 991                 }
 992                 kvm->arch.crypto.dea_kw = 0;
 993                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 994                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 995                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 996                 break;
 997         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 998                 if (!ap_instructions_available()) {
 999                         mutex_unlock(&kvm->lock);
1000                         return -EOPNOTSUPP;
1001                 }
1002                 kvm->arch.crypto.apie = 1;
1003                 break;
1004         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1005                 if (!ap_instructions_available()) {
1006                         mutex_unlock(&kvm->lock);
1007                         return -EOPNOTSUPP;
1008                 }
1009                 kvm->arch.crypto.apie = 0;
1010                 break;
1011         default:
1012                 mutex_unlock(&kvm->lock);
1013                 return -ENXIO;
1014         }
1015
1016         kvm_s390_vcpu_crypto_reset_all(kvm);
1017         mutex_unlock(&kvm->lock);
1018         return 0;
1019 }
1020
1021 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1022 {
1023         int cx;
1024         struct kvm_vcpu *vcpu;
1025
1026         kvm_for_each_vcpu(cx, vcpu, kvm)
1027                 kvm_s390_sync_request(req, vcpu);
1028 }
1029
1030 /*
1031  * Must be called with kvm->srcu held to avoid races on memslots, and with
1032  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1033  */
1034 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1035 {
1036         struct kvm_memory_slot *ms;
1037         struct kvm_memslots *slots;
1038         unsigned long ram_pages = 0;
1039         int slotnr;
1040
1041         /* migration mode already enabled */
1042         if (kvm->arch.migration_mode)
1043                 return 0;
1044         slots = kvm_memslots(kvm);
1045         if (!slots || !slots->used_slots)
1046                 return -EINVAL;
1047
1048         if (!kvm->arch.use_cmma) {
1049                 kvm->arch.migration_mode = 1;
1050                 return 0;
1051         }
1052         /* mark all the pages in active slots as dirty */
1053         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1054                 ms = slots->memslots + slotnr;
1055                 if (!ms->dirty_bitmap)
1056                         return -EINVAL;
1057                 /*
1058                  * The second half of the bitmap is only used on x86,
1059                  * and would be wasted otherwise, so we put it to good
1060                  * use here to keep track of the state of the storage
1061                  * attributes.
1062                  */
1063                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064                 ram_pages += ms->npages;
1065         }
1066         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067         kvm->arch.migration_mode = 1;
1068         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069         return 0;
1070 }
1071
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078         /* migration mode already disabled */
1079         if (!kvm->arch.migration_mode)
1080                 return 0;
1081         kvm->arch.migration_mode = 0;
1082         if (kvm->arch.use_cmma)
1083                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084         return 0;
1085 }
1086
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088                                      struct kvm_device_attr *attr)
1089 {
1090         int res = -ENXIO;
1091
1092         mutex_lock(&kvm->slots_lock);
1093         switch (attr->attr) {
1094         case KVM_S390_VM_MIGRATION_START:
1095                 res = kvm_s390_vm_start_migration(kvm);
1096                 break;
1097         case KVM_S390_VM_MIGRATION_STOP:
1098                 res = kvm_s390_vm_stop_migration(kvm);
1099                 break;
1100         default:
1101                 break;
1102         }
1103         mutex_unlock(&kvm->slots_lock);
1104
1105         return res;
1106 }
1107
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109                                      struct kvm_device_attr *attr)
1110 {
1111         u64 mig = kvm->arch.migration_mode;
1112
1113         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114                 return -ENXIO;
1115
1116         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117                 return -EFAULT;
1118         return 0;
1119 }
1120
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         struct kvm_s390_vm_tod_clock gtod;
1124
1125         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126                 return -EFAULT;
1127
1128         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129                 return -EINVAL;
1130         kvm_s390_set_tod_clock(kvm, &gtod);
1131
1132         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133                 gtod.epoch_idx, gtod.tod);
1134
1135         return 0;
1136 }
1137
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         u8 gtod_high;
1141
1142         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143                                            sizeof(gtod_high)))
1144                 return -EFAULT;
1145
1146         if (gtod_high != 0)
1147                 return -EINVAL;
1148         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149
1150         return 0;
1151 }
1152
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155         struct kvm_s390_vm_tod_clock gtod = { 0 };
1156
1157         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158                            sizeof(gtod.tod)))
1159                 return -EFAULT;
1160
1161         kvm_s390_set_tod_clock(kvm, &gtod);
1162         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163         return 0;
1164 }
1165
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         int ret;
1169
1170         if (attr->flags)
1171                 return -EINVAL;
1172
1173         switch (attr->attr) {
1174         case KVM_S390_VM_TOD_EXT:
1175                 ret = kvm_s390_set_tod_ext(kvm, attr);
1176                 break;
1177         case KVM_S390_VM_TOD_HIGH:
1178                 ret = kvm_s390_set_tod_high(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_LOW:
1181                 ret = kvm_s390_set_tod_low(kvm, attr);
1182                 break;
1183         default:
1184                 ret = -ENXIO;
1185                 break;
1186         }
1187         return ret;
1188 }
1189
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191                                    struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193         union tod_clock clk;
1194
1195         preempt_disable();
1196
1197         store_tod_clock_ext(&clk);
1198
1199         gtod->tod = clk.tod + kvm->arch.epoch;
1200         gtod->epoch_idx = 0;
1201         if (test_kvm_facility(kvm, 139)) {
1202                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203                 if (gtod->tod < clk.tod)
1204                         gtod->epoch_idx += 1;
1205         }
1206
1207         preempt_enable();
1208 }
1209
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212         struct kvm_s390_vm_tod_clock gtod;
1213
1214         memset(&gtod, 0, sizeof(gtod));
1215         kvm_s390_get_tod_clock(kvm, &gtod);
1216         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217                 return -EFAULT;
1218
1219         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220                 gtod.epoch_idx, gtod.tod);
1221         return 0;
1222 }
1223
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226         u8 gtod_high = 0;
1227
1228         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229                                          sizeof(gtod_high)))
1230                 return -EFAULT;
1231         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232
1233         return 0;
1234 }
1235
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238         u64 gtod;
1239
1240         gtod = kvm_s390_get_tod_clock_fast(kvm);
1241         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242                 return -EFAULT;
1243         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244
1245         return 0;
1246 }
1247
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250         int ret;
1251
1252         if (attr->flags)
1253                 return -EINVAL;
1254
1255         switch (attr->attr) {
1256         case KVM_S390_VM_TOD_EXT:
1257                 ret = kvm_s390_get_tod_ext(kvm, attr);
1258                 break;
1259         case KVM_S390_VM_TOD_HIGH:
1260                 ret = kvm_s390_get_tod_high(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_TOD_LOW:
1263                 ret = kvm_s390_get_tod_low(kvm, attr);
1264                 break;
1265         default:
1266                 ret = -ENXIO;
1267                 break;
1268         }
1269         return ret;
1270 }
1271
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274         struct kvm_s390_vm_cpu_processor *proc;
1275         u16 lowest_ibc, unblocked_ibc;
1276         int ret = 0;
1277
1278         mutex_lock(&kvm->lock);
1279         if (kvm->created_vcpus) {
1280                 ret = -EBUSY;
1281                 goto out;
1282         }
1283         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284         if (!proc) {
1285                 ret = -ENOMEM;
1286                 goto out;
1287         }
1288         if (!copy_from_user(proc, (void __user *)attr->addr,
1289                             sizeof(*proc))) {
1290                 kvm->arch.model.cpuid = proc->cpuid;
1291                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292                 unblocked_ibc = sclp.ibc & 0xfff;
1293                 if (lowest_ibc && proc->ibc) {
1294                         if (proc->ibc > unblocked_ibc)
1295                                 kvm->arch.model.ibc = unblocked_ibc;
1296                         else if (proc->ibc < lowest_ibc)
1297                                 kvm->arch.model.ibc = lowest_ibc;
1298                         else
1299                                 kvm->arch.model.ibc = proc->ibc;
1300                 }
1301                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1303                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304                          kvm->arch.model.ibc,
1305                          kvm->arch.model.cpuid);
1306                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307                          kvm->arch.model.fac_list[0],
1308                          kvm->arch.model.fac_list[1],
1309                          kvm->arch.model.fac_list[2]);
1310         } else
1311                 ret = -EFAULT;
1312         kfree(proc);
1313 out:
1314         mutex_unlock(&kvm->lock);
1315         return ret;
1316 }
1317
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319                                        struct kvm_device_attr *attr)
1320 {
1321         struct kvm_s390_vm_cpu_feat data;
1322
1323         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324                 return -EFAULT;
1325         if (!bitmap_subset((unsigned long *) data.feat,
1326                            kvm_s390_available_cpu_feat,
1327                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1328                 return -EINVAL;
1329
1330         mutex_lock(&kvm->lock);
1331         if (kvm->created_vcpus) {
1332                 mutex_unlock(&kvm->lock);
1333                 return -EBUSY;
1334         }
1335         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1337         mutex_unlock(&kvm->lock);
1338         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339                          data.feat[0],
1340                          data.feat[1],
1341                          data.feat[2]);
1342         return 0;
1343 }
1344
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346                                           struct kvm_device_attr *attr)
1347 {
1348         mutex_lock(&kvm->lock);
1349         if (kvm->created_vcpus) {
1350                 mutex_unlock(&kvm->lock);
1351                 return -EBUSY;
1352         }
1353
1354         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356                 mutex_unlock(&kvm->lock);
1357                 return -EFAULT;
1358         }
1359         mutex_unlock(&kvm->lock);
1360
1361         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418
1419         return 0;
1420 }
1421
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424         int ret = -ENXIO;
1425
1426         switch (attr->attr) {
1427         case KVM_S390_VM_CPU_PROCESSOR:
1428                 ret = kvm_s390_set_processor(kvm, attr);
1429                 break;
1430         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431                 ret = kvm_s390_set_processor_feat(kvm, attr);
1432                 break;
1433         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435                 break;
1436         }
1437         return ret;
1438 }
1439
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442         struct kvm_s390_vm_cpu_processor *proc;
1443         int ret = 0;
1444
1445         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446         if (!proc) {
1447                 ret = -ENOMEM;
1448                 goto out;
1449         }
1450         proc->cpuid = kvm->arch.model.cpuid;
1451         proc->ibc = kvm->arch.model.ibc;
1452         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453                S390_ARCH_FAC_LIST_SIZE_BYTE);
1454         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455                  kvm->arch.model.ibc,
1456                  kvm->arch.model.cpuid);
1457         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458                  kvm->arch.model.fac_list[0],
1459                  kvm->arch.model.fac_list[1],
1460                  kvm->arch.model.fac_list[2]);
1461         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462                 ret = -EFAULT;
1463         kfree(proc);
1464 out:
1465         return ret;
1466 }
1467
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470         struct kvm_s390_vm_cpu_machine *mach;
1471         int ret = 0;
1472
1473         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474         if (!mach) {
1475                 ret = -ENOMEM;
1476                 goto out;
1477         }
1478         get_cpu_id((struct cpuid *) &mach->cpuid);
1479         mach->ibc = sclp.ibc;
1480         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481                S390_ARCH_FAC_LIST_SIZE_BYTE);
1482         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483                sizeof(stfle_fac_list));
1484         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485                  kvm->arch.model.ibc,
1486                  kvm->arch.model.cpuid);
1487         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488                  mach->fac_mask[0],
1489                  mach->fac_mask[1],
1490                  mach->fac_mask[2]);
1491         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492                  mach->fac_list[0],
1493                  mach->fac_list[1],
1494                  mach->fac_list[2]);
1495         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496                 ret = -EFAULT;
1497         kfree(mach);
1498 out:
1499         return ret;
1500 }
1501
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503                                        struct kvm_device_attr *attr)
1504 {
1505         struct kvm_s390_vm_cpu_feat data;
1506
1507         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1509         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510                 return -EFAULT;
1511         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512                          data.feat[0],
1513                          data.feat[1],
1514                          data.feat[2]);
1515         return 0;
1516 }
1517
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519                                      struct kvm_device_attr *attr)
1520 {
1521         struct kvm_s390_vm_cpu_feat data;
1522
1523         bitmap_copy((unsigned long *) data.feat,
1524                     kvm_s390_available_cpu_feat,
1525                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1526         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527                 return -EFAULT;
1528         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529                          data.feat[0],
1530                          data.feat[1],
1531                          data.feat[2]);
1532         return 0;
1533 }
1534
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536                                           struct kvm_device_attr *attr)
1537 {
1538         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540                 return -EFAULT;
1541
1542         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599
1600         return 0;
1601 }
1602
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604                                         struct kvm_device_attr *attr)
1605 {
1606         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608                 return -EFAULT;
1609
1610         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667
1668         return 0;
1669 }
1670
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673         int ret = -ENXIO;
1674
1675         switch (attr->attr) {
1676         case KVM_S390_VM_CPU_PROCESSOR:
1677                 ret = kvm_s390_get_processor(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_CPU_MACHINE:
1680                 ret = kvm_s390_get_machine(kvm, attr);
1681                 break;
1682         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683                 ret = kvm_s390_get_processor_feat(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_CPU_MACHINE_FEAT:
1686                 ret = kvm_s390_get_machine_feat(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690                 break;
1691         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693                 break;
1694         }
1695         return ret;
1696 }
1697
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700         int ret;
1701
1702         switch (attr->group) {
1703         case KVM_S390_VM_MEM_CTRL:
1704                 ret = kvm_s390_set_mem_control(kvm, attr);
1705                 break;
1706         case KVM_S390_VM_TOD:
1707                 ret = kvm_s390_set_tod(kvm, attr);
1708                 break;
1709         case KVM_S390_VM_CPU_MODEL:
1710                 ret = kvm_s390_set_cpu_model(kvm, attr);
1711                 break;
1712         case KVM_S390_VM_CRYPTO:
1713                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1714                 break;
1715         case KVM_S390_VM_MIGRATION:
1716                 ret = kvm_s390_vm_set_migration(kvm, attr);
1717                 break;
1718         default:
1719                 ret = -ENXIO;
1720                 break;
1721         }
1722
1723         return ret;
1724 }
1725
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728         int ret;
1729
1730         switch (attr->group) {
1731         case KVM_S390_VM_MEM_CTRL:
1732                 ret = kvm_s390_get_mem_control(kvm, attr);
1733                 break;
1734         case KVM_S390_VM_TOD:
1735                 ret = kvm_s390_get_tod(kvm, attr);
1736                 break;
1737         case KVM_S390_VM_CPU_MODEL:
1738                 ret = kvm_s390_get_cpu_model(kvm, attr);
1739                 break;
1740         case KVM_S390_VM_MIGRATION:
1741                 ret = kvm_s390_vm_get_migration(kvm, attr);
1742                 break;
1743         default:
1744                 ret = -ENXIO;
1745                 break;
1746         }
1747
1748         return ret;
1749 }
1750
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753         int ret;
1754
1755         switch (attr->group) {
1756         case KVM_S390_VM_MEM_CTRL:
1757                 switch (attr->attr) {
1758                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1759                 case KVM_S390_VM_MEM_CLR_CMMA:
1760                         ret = sclp.has_cmma ? 0 : -ENXIO;
1761                         break;
1762                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1763                         ret = 0;
1764                         break;
1765                 default:
1766                         ret = -ENXIO;
1767                         break;
1768                 }
1769                 break;
1770         case KVM_S390_VM_TOD:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_TOD_LOW:
1773                 case KVM_S390_VM_TOD_HIGH:
1774                         ret = 0;
1775                         break;
1776                 default:
1777                         ret = -ENXIO;
1778                         break;
1779                 }
1780                 break;
1781         case KVM_S390_VM_CPU_MODEL:
1782                 switch (attr->attr) {
1783                 case KVM_S390_VM_CPU_PROCESSOR:
1784                 case KVM_S390_VM_CPU_MACHINE:
1785                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1787                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789                         ret = 0;
1790                         break;
1791                 default:
1792                         ret = -ENXIO;
1793                         break;
1794                 }
1795                 break;
1796         case KVM_S390_VM_CRYPTO:
1797                 switch (attr->attr) {
1798                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802                         ret = 0;
1803                         break;
1804                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806                         ret = ap_instructions_available() ? 0 : -ENXIO;
1807                         break;
1808                 default:
1809                         ret = -ENXIO;
1810                         break;
1811                 }
1812                 break;
1813         case KVM_S390_VM_MIGRATION:
1814                 ret = 0;
1815                 break;
1816         default:
1817                 ret = -ENXIO;
1818                 break;
1819         }
1820
1821         return ret;
1822 }
1823
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826         uint8_t *keys;
1827         uint64_t hva;
1828         int srcu_idx, i, r = 0;
1829
1830         if (args->flags != 0)
1831                 return -EINVAL;
1832
1833         /* Is this guest using storage keys? */
1834         if (!mm_uses_skeys(current->mm))
1835                 return KVM_S390_GET_SKEYS_NONE;
1836
1837         /* Enforce sane limit on memory allocation */
1838         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839                 return -EINVAL;
1840
1841         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842         if (!keys)
1843                 return -ENOMEM;
1844
1845         mmap_read_lock(current->mm);
1846         srcu_idx = srcu_read_lock(&kvm->srcu);
1847         for (i = 0; i < args->count; i++) {
1848                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1849                 if (kvm_is_error_hva(hva)) {
1850                         r = -EFAULT;
1851                         break;
1852                 }
1853
1854                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855                 if (r)
1856                         break;
1857         }
1858         srcu_read_unlock(&kvm->srcu, srcu_idx);
1859         mmap_read_unlock(current->mm);
1860
1861         if (!r) {
1862                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863                                  sizeof(uint8_t) * args->count);
1864                 if (r)
1865                         r = -EFAULT;
1866         }
1867
1868         kvfree(keys);
1869         return r;
1870 }
1871
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874         uint8_t *keys;
1875         uint64_t hva;
1876         int srcu_idx, i, r = 0;
1877         bool unlocked;
1878
1879         if (args->flags != 0)
1880                 return -EINVAL;
1881
1882         /* Enforce sane limit on memory allocation */
1883         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884                 return -EINVAL;
1885
1886         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887         if (!keys)
1888                 return -ENOMEM;
1889
1890         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891                            sizeof(uint8_t) * args->count);
1892         if (r) {
1893                 r = -EFAULT;
1894                 goto out;
1895         }
1896
1897         /* Enable storage key handling for the guest */
1898         r = s390_enable_skey();
1899         if (r)
1900                 goto out;
1901
1902         i = 0;
1903         mmap_read_lock(current->mm);
1904         srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906                 unlocked = false;
1907                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1908                 if (kvm_is_error_hva(hva)) {
1909                         r = -EFAULT;
1910                         break;
1911                 }
1912
1913                 /* Lowest order bit is reserved */
1914                 if (keys[i] & 0x01) {
1915                         r = -EINVAL;
1916                         break;
1917                 }
1918
1919                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920                 if (r) {
1921                         r = fixup_user_fault(current->mm, hva,
1922                                              FAULT_FLAG_WRITE, &unlocked);
1923                         if (r)
1924                                 break;
1925                 }
1926                 if (!r)
1927                         i++;
1928         }
1929         srcu_read_unlock(&kvm->srcu, srcu_idx);
1930         mmap_read_unlock(current->mm);
1931 out:
1932         kvfree(keys);
1933         return r;
1934 }
1935
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944
1945 /*
1946  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1947  * address falls in a hole. In that case the index of one of the memslots
1948  * bordering the hole is returned.
1949  */
1950 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1951 {
1952         int start = 0, end = slots->used_slots;
1953         int slot = atomic_read(&slots->last_used_slot);
1954         struct kvm_memory_slot *memslots = slots->memslots;
1955
1956         if (gfn >= memslots[slot].base_gfn &&
1957             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1958                 return slot;
1959
1960         while (start < end) {
1961                 slot = start + (end - start) / 2;
1962
1963                 if (gfn >= memslots[slot].base_gfn)
1964                         end = slot;
1965                 else
1966                         start = slot + 1;
1967         }
1968
1969         if (start >= slots->used_slots)
1970                 return slots->used_slots - 1;
1971
1972         if (gfn >= memslots[start].base_gfn &&
1973             gfn < memslots[start].base_gfn + memslots[start].npages) {
1974                 atomic_set(&slots->last_used_slot, start);
1975         }
1976
1977         return start;
1978 }
1979
1980 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1981                               u8 *res, unsigned long bufsize)
1982 {
1983         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1984
1985         args->count = 0;
1986         while (args->count < bufsize) {
1987                 hva = gfn_to_hva(kvm, cur_gfn);
1988                 /*
1989                  * We return an error if the first value was invalid, but we
1990                  * return successfully if at least one value was copied.
1991                  */
1992                 if (kvm_is_error_hva(hva))
1993                         return args->count ? 0 : -EFAULT;
1994                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1995                         pgstev = 0;
1996                 res[args->count++] = (pgstev >> 24) & 0x43;
1997                 cur_gfn++;
1998         }
1999
2000         return 0;
2001 }
2002
2003 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2004                                               unsigned long cur_gfn)
2005 {
2006         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2007         struct kvm_memory_slot *ms = slots->memslots + slotidx;
2008         unsigned long ofs = cur_gfn - ms->base_gfn;
2009
2010         if (ms->base_gfn + ms->npages <= cur_gfn) {
2011                 slotidx--;
2012                 /* If we are above the highest slot, wrap around */
2013                 if (slotidx < 0)
2014                         slotidx = slots->used_slots - 1;
2015
2016                 ms = slots->memslots + slotidx;
2017                 ofs = 0;
2018         }
2019         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2020         while ((slotidx > 0) && (ofs >= ms->npages)) {
2021                 slotidx--;
2022                 ms = slots->memslots + slotidx;
2023                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2024         }
2025         return ms->base_gfn + ofs;
2026 }
2027
2028 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2029                              u8 *res, unsigned long bufsize)
2030 {
2031         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2032         struct kvm_memslots *slots = kvm_memslots(kvm);
2033         struct kvm_memory_slot *ms;
2034
2035         if (unlikely(!slots->used_slots))
2036                 return 0;
2037
2038         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2039         ms = gfn_to_memslot(kvm, cur_gfn);
2040         args->count = 0;
2041         args->start_gfn = cur_gfn;
2042         if (!ms)
2043                 return 0;
2044         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2045         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2046
2047         while (args->count < bufsize) {
2048                 hva = gfn_to_hva(kvm, cur_gfn);
2049                 if (kvm_is_error_hva(hva))
2050                         return 0;
2051                 /* Decrement only if we actually flipped the bit to 0 */
2052                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2053                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2054                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2055                         pgstev = 0;
2056                 /* Save the value */
2057                 res[args->count++] = (pgstev >> 24) & 0x43;
2058                 /* If the next bit is too far away, stop. */
2059                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2060                         return 0;
2061                 /* If we reached the previous "next", find the next one */
2062                 if (cur_gfn == next_gfn)
2063                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2064                 /* Reached the end of memory or of the buffer, stop */
2065                 if ((next_gfn >= mem_end) ||
2066                     (next_gfn - args->start_gfn >= bufsize))
2067                         return 0;
2068                 cur_gfn++;
2069                 /* Reached the end of the current memslot, take the next one. */
2070                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2071                         ms = gfn_to_memslot(kvm, cur_gfn);
2072                         if (!ms)
2073                                 return 0;
2074                 }
2075         }
2076         return 0;
2077 }
2078
2079 /*
2080  * This function searches for the next page with dirty CMMA attributes, and
2081  * saves the attributes in the buffer up to either the end of the buffer or
2082  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2083  * no trailing clean bytes are saved.
2084  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2085  * output buffer will indicate 0 as length.
2086  */
2087 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2088                                   struct kvm_s390_cmma_log *args)
2089 {
2090         unsigned long bufsize;
2091         int srcu_idx, peek, ret;
2092         u8 *values;
2093
2094         if (!kvm->arch.use_cmma)
2095                 return -ENXIO;
2096         /* Invalid/unsupported flags were specified */
2097         if (args->flags & ~KVM_S390_CMMA_PEEK)
2098                 return -EINVAL;
2099         /* Migration mode query, and we are not doing a migration */
2100         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2101         if (!peek && !kvm->arch.migration_mode)
2102                 return -EINVAL;
2103         /* CMMA is disabled or was not used, or the buffer has length zero */
2104         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2105         if (!bufsize || !kvm->mm->context.uses_cmm) {
2106                 memset(args, 0, sizeof(*args));
2107                 return 0;
2108         }
2109         /* We are not peeking, and there are no dirty pages */
2110         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2111                 memset(args, 0, sizeof(*args));
2112                 return 0;
2113         }
2114
2115         values = vmalloc(bufsize);
2116         if (!values)
2117                 return -ENOMEM;
2118
2119         mmap_read_lock(kvm->mm);
2120         srcu_idx = srcu_read_lock(&kvm->srcu);
2121         if (peek)
2122                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2123         else
2124                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2125         srcu_read_unlock(&kvm->srcu, srcu_idx);
2126         mmap_read_unlock(kvm->mm);
2127
2128         if (kvm->arch.migration_mode)
2129                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2130         else
2131                 args->remaining = 0;
2132
2133         if (copy_to_user((void __user *)args->values, values, args->count))
2134                 ret = -EFAULT;
2135
2136         vfree(values);
2137         return ret;
2138 }
2139
2140 /*
2141  * This function sets the CMMA attributes for the given pages. If the input
2142  * buffer has zero length, no action is taken, otherwise the attributes are
2143  * set and the mm->context.uses_cmm flag is set.
2144  */
2145 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2146                                   const struct kvm_s390_cmma_log *args)
2147 {
2148         unsigned long hva, mask, pgstev, i;
2149         uint8_t *bits;
2150         int srcu_idx, r = 0;
2151
2152         mask = args->mask;
2153
2154         if (!kvm->arch.use_cmma)
2155                 return -ENXIO;
2156         /* invalid/unsupported flags */
2157         if (args->flags != 0)
2158                 return -EINVAL;
2159         /* Enforce sane limit on memory allocation */
2160         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2161                 return -EINVAL;
2162         /* Nothing to do */
2163         if (args->count == 0)
2164                 return 0;
2165
2166         bits = vmalloc(array_size(sizeof(*bits), args->count));
2167         if (!bits)
2168                 return -ENOMEM;
2169
2170         r = copy_from_user(bits, (void __user *)args->values, args->count);
2171         if (r) {
2172                 r = -EFAULT;
2173                 goto out;
2174         }
2175
2176         mmap_read_lock(kvm->mm);
2177         srcu_idx = srcu_read_lock(&kvm->srcu);
2178         for (i = 0; i < args->count; i++) {
2179                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2180                 if (kvm_is_error_hva(hva)) {
2181                         r = -EFAULT;
2182                         break;
2183                 }
2184
2185                 pgstev = bits[i];
2186                 pgstev = pgstev << 24;
2187                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2188                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2189         }
2190         srcu_read_unlock(&kvm->srcu, srcu_idx);
2191         mmap_read_unlock(kvm->mm);
2192
2193         if (!kvm->mm->context.uses_cmm) {
2194                 mmap_write_lock(kvm->mm);
2195                 kvm->mm->context.uses_cmm = 1;
2196                 mmap_write_unlock(kvm->mm);
2197         }
2198 out:
2199         vfree(bits);
2200         return r;
2201 }
2202
2203 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2204 {
2205         struct kvm_vcpu *vcpu;
2206         u16 rc, rrc;
2207         int ret = 0;
2208         int i;
2209
2210         /*
2211          * We ignore failures and try to destroy as many CPUs as possible.
2212          * At the same time we must not free the assigned resources when
2213          * this fails, as the ultravisor has still access to that memory.
2214          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2215          * behind.
2216          * We want to return the first failure rc and rrc, though.
2217          */
2218         kvm_for_each_vcpu(i, vcpu, kvm) {
2219                 mutex_lock(&vcpu->mutex);
2220                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2221                         *rcp = rc;
2222                         *rrcp = rrc;
2223                         ret = -EIO;
2224                 }
2225                 mutex_unlock(&vcpu->mutex);
2226         }
2227         /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2228         if (use_gisa)
2229                 kvm_s390_gisa_enable(kvm);
2230         return ret;
2231 }
2232
2233 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234 {
2235         int i, r = 0;
2236         u16 dummy;
2237
2238         struct kvm_vcpu *vcpu;
2239
2240         /* Disable the GISA if the ultravisor does not support AIV. */
2241         if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2242                 kvm_s390_gisa_disable(kvm);
2243
2244         kvm_for_each_vcpu(i, vcpu, kvm) {
2245                 mutex_lock(&vcpu->mutex);
2246                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2247                 mutex_unlock(&vcpu->mutex);
2248                 if (r)
2249                         break;
2250         }
2251         if (r)
2252                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2253         return r;
2254 }
2255
2256 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2257 {
2258         int r = 0;
2259         u16 dummy;
2260         void __user *argp = (void __user *)cmd->data;
2261
2262         switch (cmd->cmd) {
2263         case KVM_PV_ENABLE: {
2264                 r = -EINVAL;
2265                 if (kvm_s390_pv_is_protected(kvm))
2266                         break;
2267
2268                 /*
2269                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2270                  *  esca, we need no cleanup in the error cases below
2271                  */
2272                 r = sca_switch_to_extended(kvm);
2273                 if (r)
2274                         break;
2275
2276                 mmap_write_lock(current->mm);
2277                 r = gmap_mark_unmergeable();
2278                 mmap_write_unlock(current->mm);
2279                 if (r)
2280                         break;
2281
2282                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2283                 if (r)
2284                         break;
2285
2286                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2287                 if (r)
2288                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2289
2290                 /* we need to block service interrupts from now on */
2291                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2292                 break;
2293         }
2294         case KVM_PV_DISABLE: {
2295                 r = -EINVAL;
2296                 if (!kvm_s390_pv_is_protected(kvm))
2297                         break;
2298
2299                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2300                 /*
2301                  * If a CPU could not be destroyed, destroy VM will also fail.
2302                  * There is no point in trying to destroy it. Instead return
2303                  * the rc and rrc from the first CPU that failed destroying.
2304                  */
2305                 if (r)
2306                         break;
2307                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2308
2309                 /* no need to block service interrupts any more */
2310                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2311                 break;
2312         }
2313         case KVM_PV_SET_SEC_PARMS: {
2314                 struct kvm_s390_pv_sec_parm parms = {};
2315                 void *hdr;
2316
2317                 r = -EINVAL;
2318                 if (!kvm_s390_pv_is_protected(kvm))
2319                         break;
2320
2321                 r = -EFAULT;
2322                 if (copy_from_user(&parms, argp, sizeof(parms)))
2323                         break;
2324
2325                 /* Currently restricted to 8KB */
2326                 r = -EINVAL;
2327                 if (parms.length > PAGE_SIZE * 2)
2328                         break;
2329
2330                 r = -ENOMEM;
2331                 hdr = vmalloc(parms.length);
2332                 if (!hdr)
2333                         break;
2334
2335                 r = -EFAULT;
2336                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2337                                     parms.length))
2338                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2339                                                       &cmd->rc, &cmd->rrc);
2340
2341                 vfree(hdr);
2342                 break;
2343         }
2344         case KVM_PV_UNPACK: {
2345                 struct kvm_s390_pv_unp unp = {};
2346
2347                 r = -EINVAL;
2348                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2349                         break;
2350
2351                 r = -EFAULT;
2352                 if (copy_from_user(&unp, argp, sizeof(unp)))
2353                         break;
2354
2355                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2356                                        &cmd->rc, &cmd->rrc);
2357                 break;
2358         }
2359         case KVM_PV_VERIFY: {
2360                 r = -EINVAL;
2361                 if (!kvm_s390_pv_is_protected(kvm))
2362                         break;
2363
2364                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2365                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2366                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2367                              cmd->rrc);
2368                 break;
2369         }
2370         case KVM_PV_PREP_RESET: {
2371                 r = -EINVAL;
2372                 if (!kvm_s390_pv_is_protected(kvm))
2373                         break;
2374
2375                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2376                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2377                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2378                              cmd->rc, cmd->rrc);
2379                 break;
2380         }
2381         case KVM_PV_UNSHARE_ALL: {
2382                 r = -EINVAL;
2383                 if (!kvm_s390_pv_is_protected(kvm))
2384                         break;
2385
2386                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2387                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2388                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2389                              cmd->rc, cmd->rrc);
2390                 break;
2391         }
2392         default:
2393                 r = -ENOTTY;
2394         }
2395         return r;
2396 }
2397
2398 static bool access_key_invalid(u8 access_key)
2399 {
2400         return access_key > 0xf;
2401 }
2402
2403 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2404 {
2405         void __user *uaddr = (void __user *)mop->buf;
2406         u64 supported_flags;
2407         void *tmpbuf = NULL;
2408         int r, srcu_idx;
2409
2410         supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2411                           | KVM_S390_MEMOP_F_CHECK_ONLY;
2412         if (mop->flags & ~supported_flags || !mop->size)
2413                 return -EINVAL;
2414         if (mop->size > MEM_OP_MAX_SIZE)
2415                 return -E2BIG;
2416         if (kvm_s390_pv_is_protected(kvm))
2417                 return -EINVAL;
2418         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2419                 if (access_key_invalid(mop->key))
2420                         return -EINVAL;
2421         } else {
2422                 mop->key = 0;
2423         }
2424         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2425                 tmpbuf = vmalloc(mop->size);
2426                 if (!tmpbuf)
2427                         return -ENOMEM;
2428         }
2429
2430         srcu_idx = srcu_read_lock(&kvm->srcu);
2431
2432         if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2433                 r = PGM_ADDRESSING;
2434                 goto out_unlock;
2435         }
2436
2437         switch (mop->op) {
2438         case KVM_S390_MEMOP_ABSOLUTE_READ: {
2439                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2440                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2441                 } else {
2442                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2443                                                       mop->size, GACC_FETCH, mop->key);
2444                         if (r == 0) {
2445                                 if (copy_to_user(uaddr, tmpbuf, mop->size))
2446                                         r = -EFAULT;
2447                         }
2448                 }
2449                 break;
2450         }
2451         case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2452                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2453                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2454                 } else {
2455                         if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2456                                 r = -EFAULT;
2457                                 break;
2458                         }
2459                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2460                                                       mop->size, GACC_STORE, mop->key);
2461                 }
2462                 break;
2463         }
2464         default:
2465                 r = -EINVAL;
2466         }
2467
2468 out_unlock:
2469         srcu_read_unlock(&kvm->srcu, srcu_idx);
2470
2471         vfree(tmpbuf);
2472         return r;
2473 }
2474
2475 long kvm_arch_vm_ioctl(struct file *filp,
2476                        unsigned int ioctl, unsigned long arg)
2477 {
2478         struct kvm *kvm = filp->private_data;
2479         void __user *argp = (void __user *)arg;
2480         struct kvm_device_attr attr;
2481         int r;
2482
2483         switch (ioctl) {
2484         case KVM_S390_INTERRUPT: {
2485                 struct kvm_s390_interrupt s390int;
2486
2487                 r = -EFAULT;
2488                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2489                         break;
2490                 r = kvm_s390_inject_vm(kvm, &s390int);
2491                 break;
2492         }
2493         case KVM_CREATE_IRQCHIP: {
2494                 struct kvm_irq_routing_entry routing;
2495
2496                 r = -EINVAL;
2497                 if (kvm->arch.use_irqchip) {
2498                         /* Set up dummy routing. */
2499                         memset(&routing, 0, sizeof(routing));
2500                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2501                 }
2502                 break;
2503         }
2504         case KVM_SET_DEVICE_ATTR: {
2505                 r = -EFAULT;
2506                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2507                         break;
2508                 r = kvm_s390_vm_set_attr(kvm, &attr);
2509                 break;
2510         }
2511         case KVM_GET_DEVICE_ATTR: {
2512                 r = -EFAULT;
2513                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2514                         break;
2515                 r = kvm_s390_vm_get_attr(kvm, &attr);
2516                 break;
2517         }
2518         case KVM_HAS_DEVICE_ATTR: {
2519                 r = -EFAULT;
2520                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2521                         break;
2522                 r = kvm_s390_vm_has_attr(kvm, &attr);
2523                 break;
2524         }
2525         case KVM_S390_GET_SKEYS: {
2526                 struct kvm_s390_skeys args;
2527
2528                 r = -EFAULT;
2529                 if (copy_from_user(&args, argp,
2530                                    sizeof(struct kvm_s390_skeys)))
2531                         break;
2532                 r = kvm_s390_get_skeys(kvm, &args);
2533                 break;
2534         }
2535         case KVM_S390_SET_SKEYS: {
2536                 struct kvm_s390_skeys args;
2537
2538                 r = -EFAULT;
2539                 if (copy_from_user(&args, argp,
2540                                    sizeof(struct kvm_s390_skeys)))
2541                         break;
2542                 r = kvm_s390_set_skeys(kvm, &args);
2543                 break;
2544         }
2545         case KVM_S390_GET_CMMA_BITS: {
2546                 struct kvm_s390_cmma_log args;
2547
2548                 r = -EFAULT;
2549                 if (copy_from_user(&args, argp, sizeof(args)))
2550                         break;
2551                 mutex_lock(&kvm->slots_lock);
2552                 r = kvm_s390_get_cmma_bits(kvm, &args);
2553                 mutex_unlock(&kvm->slots_lock);
2554                 if (!r) {
2555                         r = copy_to_user(argp, &args, sizeof(args));
2556                         if (r)
2557                                 r = -EFAULT;
2558                 }
2559                 break;
2560         }
2561         case KVM_S390_SET_CMMA_BITS: {
2562                 struct kvm_s390_cmma_log args;
2563
2564                 r = -EFAULT;
2565                 if (copy_from_user(&args, argp, sizeof(args)))
2566                         break;
2567                 mutex_lock(&kvm->slots_lock);
2568                 r = kvm_s390_set_cmma_bits(kvm, &args);
2569                 mutex_unlock(&kvm->slots_lock);
2570                 break;
2571         }
2572         case KVM_S390_PV_COMMAND: {
2573                 struct kvm_pv_cmd args;
2574
2575                 /* protvirt means user cpu state */
2576                 kvm_s390_set_user_cpu_state_ctrl(kvm);
2577                 r = 0;
2578                 if (!is_prot_virt_host()) {
2579                         r = -EINVAL;
2580                         break;
2581                 }
2582                 if (copy_from_user(&args, argp, sizeof(args))) {
2583                         r = -EFAULT;
2584                         break;
2585                 }
2586                 if (args.flags) {
2587                         r = -EINVAL;
2588                         break;
2589                 }
2590                 mutex_lock(&kvm->lock);
2591                 r = kvm_s390_handle_pv(kvm, &args);
2592                 mutex_unlock(&kvm->lock);
2593                 if (copy_to_user(argp, &args, sizeof(args))) {
2594                         r = -EFAULT;
2595                         break;
2596                 }
2597                 break;
2598         }
2599         case KVM_S390_MEM_OP: {
2600                 struct kvm_s390_mem_op mem_op;
2601
2602                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2603                         r = kvm_s390_vm_mem_op(kvm, &mem_op);
2604                 else
2605                         r = -EFAULT;
2606                 break;
2607         }
2608         default:
2609                 r = -ENOTTY;
2610         }
2611
2612         return r;
2613 }
2614
2615 static int kvm_s390_apxa_installed(void)
2616 {
2617         struct ap_config_info info;
2618
2619         if (ap_instructions_available()) {
2620                 if (ap_qci(&info) == 0)
2621                         return info.apxa;
2622         }
2623
2624         return 0;
2625 }
2626
2627 /*
2628  * The format of the crypto control block (CRYCB) is specified in the 3 low
2629  * order bits of the CRYCB designation (CRYCBD) field as follows:
2630  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2631  *           AP extended addressing (APXA) facility are installed.
2632  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2633  * Format 2: Both the APXA and MSAX3 facilities are installed
2634  */
2635 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2636 {
2637         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2638
2639         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2640         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2641
2642         /* Check whether MSAX3 is installed */
2643         if (!test_kvm_facility(kvm, 76))
2644                 return;
2645
2646         if (kvm_s390_apxa_installed())
2647                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2648         else
2649                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2650 }
2651
2652 /*
2653  * kvm_arch_crypto_set_masks
2654  *
2655  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2656  *       to be set.
2657  * @apm: the mask identifying the accessible AP adapters
2658  * @aqm: the mask identifying the accessible AP domains
2659  * @adm: the mask identifying the accessible AP control domains
2660  *
2661  * Set the masks that identify the adapters, domains and control domains to
2662  * which the KVM guest is granted access.
2663  *
2664  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2665  *       function.
2666  */
2667 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2668                                unsigned long *aqm, unsigned long *adm)
2669 {
2670         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2671
2672         kvm_s390_vcpu_block_all(kvm);
2673
2674         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2675         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2676                 memcpy(crycb->apcb1.apm, apm, 32);
2677                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2678                          apm[0], apm[1], apm[2], apm[3]);
2679                 memcpy(crycb->apcb1.aqm, aqm, 32);
2680                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2681                          aqm[0], aqm[1], aqm[2], aqm[3]);
2682                 memcpy(crycb->apcb1.adm, adm, 32);
2683                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2684                          adm[0], adm[1], adm[2], adm[3]);
2685                 break;
2686         case CRYCB_FORMAT1:
2687         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2688                 memcpy(crycb->apcb0.apm, apm, 8);
2689                 memcpy(crycb->apcb0.aqm, aqm, 2);
2690                 memcpy(crycb->apcb0.adm, adm, 2);
2691                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2692                          apm[0], *((unsigned short *)aqm),
2693                          *((unsigned short *)adm));
2694                 break;
2695         default:        /* Can not happen */
2696                 break;
2697         }
2698
2699         /* recreate the shadow crycb for each vcpu */
2700         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2701         kvm_s390_vcpu_unblock_all(kvm);
2702 }
2703 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2704
2705 /*
2706  * kvm_arch_crypto_clear_masks
2707  *
2708  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2709  *       to be cleared.
2710  *
2711  * Clear the masks that identify the adapters, domains and control domains to
2712  * which the KVM guest is granted access.
2713  *
2714  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2715  *       function.
2716  */
2717 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2718 {
2719         kvm_s390_vcpu_block_all(kvm);
2720
2721         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2722                sizeof(kvm->arch.crypto.crycb->apcb0));
2723         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2724                sizeof(kvm->arch.crypto.crycb->apcb1));
2725
2726         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2727         /* recreate the shadow crycb for each vcpu */
2728         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2729         kvm_s390_vcpu_unblock_all(kvm);
2730 }
2731 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2732
2733 static u64 kvm_s390_get_initial_cpuid(void)
2734 {
2735         struct cpuid cpuid;
2736
2737         get_cpu_id(&cpuid);
2738         cpuid.version = 0xff;
2739         return *((u64 *) &cpuid);
2740 }
2741
2742 static void kvm_s390_crypto_init(struct kvm *kvm)
2743 {
2744         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2745         kvm_s390_set_crycb_format(kvm);
2746         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2747
2748         if (!test_kvm_facility(kvm, 76))
2749                 return;
2750
2751         /* Enable AES/DEA protected key functions by default */
2752         kvm->arch.crypto.aes_kw = 1;
2753         kvm->arch.crypto.dea_kw = 1;
2754         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2755                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2756         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2757                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2758 }
2759
2760 static void sca_dispose(struct kvm *kvm)
2761 {
2762         if (kvm->arch.use_esca)
2763                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2764         else
2765                 free_page((unsigned long)(kvm->arch.sca));
2766         kvm->arch.sca = NULL;
2767 }
2768
2769 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2770 {
2771         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2772         int i, rc;
2773         char debug_name[16];
2774         static unsigned long sca_offset;
2775
2776         rc = -EINVAL;
2777 #ifdef CONFIG_KVM_S390_UCONTROL
2778         if (type & ~KVM_VM_S390_UCONTROL)
2779                 goto out_err;
2780         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2781                 goto out_err;
2782 #else
2783         if (type)
2784                 goto out_err;
2785 #endif
2786
2787         rc = s390_enable_sie();
2788         if (rc)
2789                 goto out_err;
2790
2791         rc = -ENOMEM;
2792
2793         if (!sclp.has_64bscao)
2794                 alloc_flags |= GFP_DMA;
2795         rwlock_init(&kvm->arch.sca_lock);
2796         /* start with basic SCA */
2797         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2798         if (!kvm->arch.sca)
2799                 goto out_err;
2800         mutex_lock(&kvm_lock);
2801         sca_offset += 16;
2802         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2803                 sca_offset = 0;
2804         kvm->arch.sca = (struct bsca_block *)
2805                         ((char *) kvm->arch.sca + sca_offset);
2806         mutex_unlock(&kvm_lock);
2807
2808         sprintf(debug_name, "kvm-%u", current->pid);
2809
2810         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2811         if (!kvm->arch.dbf)
2812                 goto out_err;
2813
2814         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2815         kvm->arch.sie_page2 =
2816              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2817         if (!kvm->arch.sie_page2)
2818                 goto out_err;
2819
2820         kvm->arch.sie_page2->kvm = kvm;
2821         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2822
2823         for (i = 0; i < kvm_s390_fac_size(); i++) {
2824                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2825                                               (kvm_s390_fac_base[i] |
2826                                                kvm_s390_fac_ext[i]);
2827                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2828                                               kvm_s390_fac_base[i];
2829         }
2830         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2831
2832         /* we are always in czam mode - even on pre z14 machines */
2833         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2834         set_kvm_facility(kvm->arch.model.fac_list, 138);
2835         /* we emulate STHYI in kvm */
2836         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2837         set_kvm_facility(kvm->arch.model.fac_list, 74);
2838         if (MACHINE_HAS_TLB_GUEST) {
2839                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2840                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2841         }
2842
2843         if (css_general_characteristics.aiv && test_facility(65))
2844                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2845
2846         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2847         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2848
2849         kvm_s390_crypto_init(kvm);
2850
2851         mutex_init(&kvm->arch.float_int.ais_lock);
2852         spin_lock_init(&kvm->arch.float_int.lock);
2853         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2854                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2855         init_waitqueue_head(&kvm->arch.ipte_wq);
2856         mutex_init(&kvm->arch.ipte_mutex);
2857
2858         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2859         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2860
2861         if (type & KVM_VM_S390_UCONTROL) {
2862                 kvm->arch.gmap = NULL;
2863                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2864         } else {
2865                 if (sclp.hamax == U64_MAX)
2866                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2867                 else
2868                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2869                                                     sclp.hamax + 1);
2870                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2871                 if (!kvm->arch.gmap)
2872                         goto out_err;
2873                 kvm->arch.gmap->private = kvm;
2874                 kvm->arch.gmap->pfault_enabled = 0;
2875         }
2876
2877         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2878         kvm->arch.use_skf = sclp.has_skey;
2879         spin_lock_init(&kvm->arch.start_stop_lock);
2880         kvm_s390_vsie_init(kvm);
2881         if (use_gisa)
2882                 kvm_s390_gisa_init(kvm);
2883         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2884
2885         return 0;
2886 out_err:
2887         free_page((unsigned long)kvm->arch.sie_page2);
2888         debug_unregister(kvm->arch.dbf);
2889         sca_dispose(kvm);
2890         KVM_EVENT(3, "creation of vm failed: %d", rc);
2891         return rc;
2892 }
2893
2894 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2895 {
2896         u16 rc, rrc;
2897
2898         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2899         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2900         kvm_s390_clear_local_irqs(vcpu);
2901         kvm_clear_async_pf_completion_queue(vcpu);
2902         if (!kvm_is_ucontrol(vcpu->kvm))
2903                 sca_del_vcpu(vcpu);
2904
2905         if (kvm_is_ucontrol(vcpu->kvm))
2906                 gmap_remove(vcpu->arch.gmap);
2907
2908         if (vcpu->kvm->arch.use_cmma)
2909                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2910         /* We can not hold the vcpu mutex here, we are already dying */
2911         if (kvm_s390_pv_cpu_get_handle(vcpu))
2912                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2913         free_page((unsigned long)(vcpu->arch.sie_block));
2914 }
2915
2916 static void kvm_free_vcpus(struct kvm *kvm)
2917 {
2918         unsigned int i;
2919         struct kvm_vcpu *vcpu;
2920
2921         kvm_for_each_vcpu(i, vcpu, kvm)
2922                 kvm_vcpu_destroy(vcpu);
2923
2924         mutex_lock(&kvm->lock);
2925         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2926                 kvm->vcpus[i] = NULL;
2927
2928         atomic_set(&kvm->online_vcpus, 0);
2929         mutex_unlock(&kvm->lock);
2930 }
2931
2932 void kvm_arch_destroy_vm(struct kvm *kvm)
2933 {
2934         u16 rc, rrc;
2935
2936         kvm_free_vcpus(kvm);
2937         sca_dispose(kvm);
2938         kvm_s390_gisa_destroy(kvm);
2939         /*
2940          * We are already at the end of life and kvm->lock is not taken.
2941          * This is ok as the file descriptor is closed by now and nobody
2942          * can mess with the pv state. To avoid lockdep_assert_held from
2943          * complaining we do not use kvm_s390_pv_is_protected.
2944          */
2945         if (kvm_s390_pv_get_handle(kvm))
2946                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2947         debug_unregister(kvm->arch.dbf);
2948         free_page((unsigned long)kvm->arch.sie_page2);
2949         if (!kvm_is_ucontrol(kvm))
2950                 gmap_remove(kvm->arch.gmap);
2951         kvm_s390_destroy_adapters(kvm);
2952         kvm_s390_clear_float_irqs(kvm);
2953         kvm_s390_vsie_destroy(kvm);
2954         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2955 }
2956
2957 /* Section: vcpu related */
2958 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2959 {
2960         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2961         if (!vcpu->arch.gmap)
2962                 return -ENOMEM;
2963         vcpu->arch.gmap->private = vcpu->kvm;
2964
2965         return 0;
2966 }
2967
2968 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2969 {
2970         if (!kvm_s390_use_sca_entries())
2971                 return;
2972         read_lock(&vcpu->kvm->arch.sca_lock);
2973         if (vcpu->kvm->arch.use_esca) {
2974                 struct esca_block *sca = vcpu->kvm->arch.sca;
2975
2976                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2977                 sca->cpu[vcpu->vcpu_id].sda = 0;
2978         } else {
2979                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2980
2981                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2982                 sca->cpu[vcpu->vcpu_id].sda = 0;
2983         }
2984         read_unlock(&vcpu->kvm->arch.sca_lock);
2985 }
2986
2987 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2988 {
2989         if (!kvm_s390_use_sca_entries()) {
2990                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2991
2992                 /* we still need the basic sca for the ipte control */
2993                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2994                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2995                 return;
2996         }
2997         read_lock(&vcpu->kvm->arch.sca_lock);
2998         if (vcpu->kvm->arch.use_esca) {
2999                 struct esca_block *sca = vcpu->kvm->arch.sca;
3000
3001                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3002                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3003                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
3004                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3005                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3006         } else {
3007                 struct bsca_block *sca = vcpu->kvm->arch.sca;
3008
3009                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3010                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3011                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3012                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3013         }
3014         read_unlock(&vcpu->kvm->arch.sca_lock);
3015 }
3016
3017 /* Basic SCA to Extended SCA data copy routines */
3018 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3019 {
3020         d->sda = s->sda;
3021         d->sigp_ctrl.c = s->sigp_ctrl.c;
3022         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3023 }
3024
3025 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3026 {
3027         int i;
3028
3029         d->ipte_control = s->ipte_control;
3030         d->mcn[0] = s->mcn;
3031         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3032                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3033 }
3034
3035 static int sca_switch_to_extended(struct kvm *kvm)
3036 {
3037         struct bsca_block *old_sca = kvm->arch.sca;
3038         struct esca_block *new_sca;
3039         struct kvm_vcpu *vcpu;
3040         unsigned int vcpu_idx;
3041         u32 scaol, scaoh;
3042
3043         if (kvm->arch.use_esca)
3044                 return 0;
3045
3046         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3047         if (!new_sca)
3048                 return -ENOMEM;
3049
3050         scaoh = (u32)((u64)(new_sca) >> 32);
3051         scaol = (u32)(u64)(new_sca) & ~0x3fU;
3052
3053         kvm_s390_vcpu_block_all(kvm);
3054         write_lock(&kvm->arch.sca_lock);
3055
3056         sca_copy_b_to_e(new_sca, old_sca);
3057
3058         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3059                 vcpu->arch.sie_block->scaoh = scaoh;
3060                 vcpu->arch.sie_block->scaol = scaol;
3061                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3062         }
3063         kvm->arch.sca = new_sca;
3064         kvm->arch.use_esca = 1;
3065
3066         write_unlock(&kvm->arch.sca_lock);
3067         kvm_s390_vcpu_unblock_all(kvm);
3068
3069         free_page((unsigned long)old_sca);
3070
3071         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3072                  old_sca, kvm->arch.sca);
3073         return 0;
3074 }
3075
3076 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3077 {
3078         int rc;
3079
3080         if (!kvm_s390_use_sca_entries()) {
3081                 if (id < KVM_MAX_VCPUS)
3082                         return true;
3083                 return false;
3084         }
3085         if (id < KVM_S390_BSCA_CPU_SLOTS)
3086                 return true;
3087         if (!sclp.has_esca || !sclp.has_64bscao)
3088                 return false;
3089
3090         mutex_lock(&kvm->lock);
3091         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3092         mutex_unlock(&kvm->lock);
3093
3094         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3095 }
3096
3097 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3098 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3099 {
3100         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3101         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3102         vcpu->arch.cputm_start = get_tod_clock_fast();
3103         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3104 }
3105
3106 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3107 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3108 {
3109         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3110         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3111         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3112         vcpu->arch.cputm_start = 0;
3113         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3114 }
3115
3116 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3117 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3118 {
3119         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3120         vcpu->arch.cputm_enabled = true;
3121         __start_cpu_timer_accounting(vcpu);
3122 }
3123
3124 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3125 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3126 {
3127         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3128         __stop_cpu_timer_accounting(vcpu);
3129         vcpu->arch.cputm_enabled = false;
3130 }
3131
3132 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3133 {
3134         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3135         __enable_cpu_timer_accounting(vcpu);
3136         preempt_enable();
3137 }
3138
3139 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3140 {
3141         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3142         __disable_cpu_timer_accounting(vcpu);
3143         preempt_enable();
3144 }
3145
3146 /* set the cpu timer - may only be called from the VCPU thread itself */
3147 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3148 {
3149         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3150         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3151         if (vcpu->arch.cputm_enabled)
3152                 vcpu->arch.cputm_start = get_tod_clock_fast();
3153         vcpu->arch.sie_block->cputm = cputm;
3154         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3155         preempt_enable();
3156 }
3157
3158 /* update and get the cpu timer - can also be called from other VCPU threads */
3159 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3160 {
3161         unsigned int seq;
3162         __u64 value;
3163
3164         if (unlikely(!vcpu->arch.cputm_enabled))
3165                 return vcpu->arch.sie_block->cputm;
3166
3167         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3168         do {
3169                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3170                 /*
3171                  * If the writer would ever execute a read in the critical
3172                  * section, e.g. in irq context, we have a deadlock.
3173                  */
3174                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3175                 value = vcpu->arch.sie_block->cputm;
3176                 /* if cputm_start is 0, accounting is being started/stopped */
3177                 if (likely(vcpu->arch.cputm_start))
3178                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3179         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3180         preempt_enable();
3181         return value;
3182 }
3183
3184 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3185 {
3186
3187         gmap_enable(vcpu->arch.enabled_gmap);
3188         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3189         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3190                 __start_cpu_timer_accounting(vcpu);
3191         vcpu->cpu = cpu;
3192 }
3193
3194 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3195 {
3196         vcpu->cpu = -1;
3197         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3198                 __stop_cpu_timer_accounting(vcpu);
3199         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3200         vcpu->arch.enabled_gmap = gmap_get_enabled();
3201         gmap_disable(vcpu->arch.enabled_gmap);
3202
3203 }
3204
3205 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3206 {
3207         mutex_lock(&vcpu->kvm->lock);
3208         preempt_disable();
3209         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3210         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3211         preempt_enable();
3212         mutex_unlock(&vcpu->kvm->lock);
3213         if (!kvm_is_ucontrol(vcpu->kvm)) {
3214                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3215                 sca_add_vcpu(vcpu);
3216         }
3217         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3218                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3219         /* make vcpu_load load the right gmap on the first trigger */
3220         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3221 }
3222
3223 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3224 {
3225         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3226             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3227                 return true;
3228         return false;
3229 }
3230
3231 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3232 {
3233         /* At least one ECC subfunction must be present */
3234         return kvm_has_pckmo_subfunc(kvm, 32) ||
3235                kvm_has_pckmo_subfunc(kvm, 33) ||
3236                kvm_has_pckmo_subfunc(kvm, 34) ||
3237                kvm_has_pckmo_subfunc(kvm, 40) ||
3238                kvm_has_pckmo_subfunc(kvm, 41);
3239
3240 }
3241
3242 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3243 {
3244         /*
3245          * If the AP instructions are not being interpreted and the MSAX3
3246          * facility is not configured for the guest, there is nothing to set up.
3247          */
3248         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3249                 return;
3250
3251         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3252         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3253         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3254         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3255
3256         if (vcpu->kvm->arch.crypto.apie)
3257                 vcpu->arch.sie_block->eca |= ECA_APIE;
3258
3259         /* Set up protected key support */
3260         if (vcpu->kvm->arch.crypto.aes_kw) {
3261                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3262                 /* ecc is also wrapped with AES key */
3263                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3264                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3265         }
3266
3267         if (vcpu->kvm->arch.crypto.dea_kw)
3268                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3269 }
3270
3271 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3272 {
3273         free_page(vcpu->arch.sie_block->cbrlo);
3274         vcpu->arch.sie_block->cbrlo = 0;
3275 }
3276
3277 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3278 {
3279         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3280         if (!vcpu->arch.sie_block->cbrlo)
3281                 return -ENOMEM;
3282         return 0;
3283 }
3284
3285 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3286 {
3287         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3288
3289         vcpu->arch.sie_block->ibc = model->ibc;
3290         if (test_kvm_facility(vcpu->kvm, 7))
3291                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3292 }
3293
3294 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3295 {
3296         int rc = 0;
3297         u16 uvrc, uvrrc;
3298
3299         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3300                                                     CPUSTAT_SM |
3301                                                     CPUSTAT_STOPPED);
3302
3303         if (test_kvm_facility(vcpu->kvm, 78))
3304                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3305         else if (test_kvm_facility(vcpu->kvm, 8))
3306                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3307
3308         kvm_s390_vcpu_setup_model(vcpu);
3309
3310         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3311         if (MACHINE_HAS_ESOP)
3312                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3313         if (test_kvm_facility(vcpu->kvm, 9))
3314                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3315         if (test_kvm_facility(vcpu->kvm, 73))
3316                 vcpu->arch.sie_block->ecb |= ECB_TE;
3317         if (!kvm_is_ucontrol(vcpu->kvm))
3318                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3319
3320         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3321                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3322         if (test_kvm_facility(vcpu->kvm, 130))
3323                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3324         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3325         if (sclp.has_cei)
3326                 vcpu->arch.sie_block->eca |= ECA_CEI;
3327         if (sclp.has_ib)
3328                 vcpu->arch.sie_block->eca |= ECA_IB;
3329         if (sclp.has_siif)
3330                 vcpu->arch.sie_block->eca |= ECA_SII;
3331         if (sclp.has_sigpif)
3332                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3333         if (test_kvm_facility(vcpu->kvm, 129)) {
3334                 vcpu->arch.sie_block->eca |= ECA_VX;
3335                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3336         }
3337         if (test_kvm_facility(vcpu->kvm, 139))
3338                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3339         if (test_kvm_facility(vcpu->kvm, 156))
3340                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3341         if (vcpu->arch.sie_block->gd) {
3342                 vcpu->arch.sie_block->eca |= ECA_AIV;
3343                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3344                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3345         }
3346         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3347                                         | SDNXC;
3348         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3349
3350         if (sclp.has_kss)
3351                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3352         else
3353                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3354
3355         if (vcpu->kvm->arch.use_cmma) {
3356                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3357                 if (rc)
3358                         return rc;
3359         }
3360         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3361         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3362
3363         vcpu->arch.sie_block->hpid = HPID_KVM;
3364
3365         kvm_s390_vcpu_crypto_setup(vcpu);
3366
3367         mutex_lock(&vcpu->kvm->lock);
3368         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3369                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3370                 if (rc)
3371                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3372         }
3373         mutex_unlock(&vcpu->kvm->lock);
3374
3375         return rc;
3376 }
3377
3378 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3379 {
3380         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3381                 return -EINVAL;
3382         return 0;
3383 }
3384
3385 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3386 {
3387         struct sie_page *sie_page;
3388         int rc;
3389
3390         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3391         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3392         if (!sie_page)
3393                 return -ENOMEM;
3394
3395         vcpu->arch.sie_block = &sie_page->sie_block;
3396         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3397
3398         /* the real guest size will always be smaller than msl */
3399         vcpu->arch.sie_block->mso = 0;
3400         vcpu->arch.sie_block->msl = sclp.hamax;
3401
3402         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3403         spin_lock_init(&vcpu->arch.local_int.lock);
3404         vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3405         seqcount_init(&vcpu->arch.cputm_seqcount);
3406
3407         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3408         kvm_clear_async_pf_completion_queue(vcpu);
3409         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3410                                     KVM_SYNC_GPRS |
3411                                     KVM_SYNC_ACRS |
3412                                     KVM_SYNC_CRS |
3413                                     KVM_SYNC_ARCH0 |
3414                                     KVM_SYNC_PFAULT |
3415                                     KVM_SYNC_DIAG318;
3416         kvm_s390_set_prefix(vcpu, 0);
3417         if (test_kvm_facility(vcpu->kvm, 64))
3418                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3419         if (test_kvm_facility(vcpu->kvm, 82))
3420                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3421         if (test_kvm_facility(vcpu->kvm, 133))
3422                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3423         if (test_kvm_facility(vcpu->kvm, 156))
3424                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3425         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3426          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3427          */
3428         if (MACHINE_HAS_VX)
3429                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3430         else
3431                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3432
3433         if (kvm_is_ucontrol(vcpu->kvm)) {
3434                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3435                 if (rc)
3436                         goto out_free_sie_block;
3437         }
3438
3439         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3440                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3441         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3442
3443         rc = kvm_s390_vcpu_setup(vcpu);
3444         if (rc)
3445                 goto out_ucontrol_uninit;
3446         return 0;
3447
3448 out_ucontrol_uninit:
3449         if (kvm_is_ucontrol(vcpu->kvm))
3450                 gmap_remove(vcpu->arch.gmap);
3451 out_free_sie_block:
3452         free_page((unsigned long)(vcpu->arch.sie_block));
3453         return rc;
3454 }
3455
3456 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3457 {
3458         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3459         return kvm_s390_vcpu_has_irq(vcpu, 0);
3460 }
3461
3462 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3463 {
3464         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3465 }
3466
3467 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3468 {
3469         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3470         exit_sie(vcpu);
3471 }
3472
3473 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3474 {
3475         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3476 }
3477
3478 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3479 {
3480         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3481         exit_sie(vcpu);
3482 }
3483
3484 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3485 {
3486         return atomic_read(&vcpu->arch.sie_block->prog20) &
3487                (PROG_BLOCK_SIE | PROG_REQUEST);
3488 }
3489
3490 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3491 {
3492         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3493 }
3494
3495 /*
3496  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3497  * If the CPU is not running (e.g. waiting as idle) the function will
3498  * return immediately. */
3499 void exit_sie(struct kvm_vcpu *vcpu)
3500 {
3501         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3502         kvm_s390_vsie_kick(vcpu);
3503         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3504                 cpu_relax();
3505 }
3506
3507 /* Kick a guest cpu out of SIE to process a request synchronously */
3508 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3509 {
3510         kvm_make_request(req, vcpu);
3511         kvm_s390_vcpu_request(vcpu);
3512 }
3513
3514 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3515                               unsigned long end)
3516 {
3517         struct kvm *kvm = gmap->private;
3518         struct kvm_vcpu *vcpu;
3519         unsigned long prefix;
3520         int i;
3521
3522         if (gmap_is_shadow(gmap))
3523                 return;
3524         if (start >= 1UL << 31)
3525                 /* We are only interested in prefix pages */
3526                 return;
3527         kvm_for_each_vcpu(i, vcpu, kvm) {
3528                 /* match against both prefix pages */
3529                 prefix = kvm_s390_get_prefix(vcpu);
3530                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3531                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3532                                    start, end);
3533                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3534                 }
3535         }
3536 }
3537
3538 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3539 {
3540         /* do not poll with more than halt_poll_max_steal percent of steal time */
3541         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3542             READ_ONCE(halt_poll_max_steal)) {
3543                 vcpu->stat.halt_no_poll_steal++;
3544                 return true;
3545         }
3546         return false;
3547 }
3548
3549 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3550 {
3551         /* kvm common code refers to this, but never calls it */
3552         BUG();
3553         return 0;
3554 }
3555
3556 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3557                                            struct kvm_one_reg *reg)
3558 {
3559         int r = -EINVAL;
3560
3561         switch (reg->id) {
3562         case KVM_REG_S390_TODPR:
3563                 r = put_user(vcpu->arch.sie_block->todpr,
3564                              (u32 __user *)reg->addr);
3565                 break;
3566         case KVM_REG_S390_EPOCHDIFF:
3567                 r = put_user(vcpu->arch.sie_block->epoch,
3568                              (u64 __user *)reg->addr);
3569                 break;
3570         case KVM_REG_S390_CPU_TIMER:
3571                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3572                              (u64 __user *)reg->addr);
3573                 break;
3574         case KVM_REG_S390_CLOCK_COMP:
3575                 r = put_user(vcpu->arch.sie_block->ckc,
3576                              (u64 __user *)reg->addr);
3577                 break;
3578         case KVM_REG_S390_PFTOKEN:
3579                 r = put_user(vcpu->arch.pfault_token,
3580                              (u64 __user *)reg->addr);
3581                 break;
3582         case KVM_REG_S390_PFCOMPARE:
3583                 r = put_user(vcpu->arch.pfault_compare,
3584                              (u64 __user *)reg->addr);
3585                 break;
3586         case KVM_REG_S390_PFSELECT:
3587                 r = put_user(vcpu->arch.pfault_select,
3588                              (u64 __user *)reg->addr);
3589                 break;
3590         case KVM_REG_S390_PP:
3591                 r = put_user(vcpu->arch.sie_block->pp,
3592                              (u64 __user *)reg->addr);
3593                 break;
3594         case KVM_REG_S390_GBEA:
3595                 r = put_user(vcpu->arch.sie_block->gbea,
3596                              (u64 __user *)reg->addr);
3597                 break;
3598         default:
3599                 break;
3600         }
3601
3602         return r;
3603 }
3604
3605 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3606                                            struct kvm_one_reg *reg)
3607 {
3608         int r = -EINVAL;
3609         __u64 val;
3610
3611         switch (reg->id) {
3612         case KVM_REG_S390_TODPR:
3613                 r = get_user(vcpu->arch.sie_block->todpr,
3614                              (u32 __user *)reg->addr);
3615                 break;
3616         case KVM_REG_S390_EPOCHDIFF:
3617                 r = get_user(vcpu->arch.sie_block->epoch,
3618                              (u64 __user *)reg->addr);
3619                 break;
3620         case KVM_REG_S390_CPU_TIMER:
3621                 r = get_user(val, (u64 __user *)reg->addr);
3622                 if (!r)
3623                         kvm_s390_set_cpu_timer(vcpu, val);
3624                 break;
3625         case KVM_REG_S390_CLOCK_COMP:
3626                 r = get_user(vcpu->arch.sie_block->ckc,
3627                              (u64 __user *)reg->addr);
3628                 break;
3629         case KVM_REG_S390_PFTOKEN:
3630                 r = get_user(vcpu->arch.pfault_token,
3631                              (u64 __user *)reg->addr);
3632                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3633                         kvm_clear_async_pf_completion_queue(vcpu);
3634                 break;
3635         case KVM_REG_S390_PFCOMPARE:
3636                 r = get_user(vcpu->arch.pfault_compare,
3637                              (u64 __user *)reg->addr);
3638                 break;
3639         case KVM_REG_S390_PFSELECT:
3640                 r = get_user(vcpu->arch.pfault_select,
3641                              (u64 __user *)reg->addr);
3642                 break;
3643         case KVM_REG_S390_PP:
3644                 r = get_user(vcpu->arch.sie_block->pp,
3645                              (u64 __user *)reg->addr);
3646                 break;
3647         case KVM_REG_S390_GBEA:
3648                 r = get_user(vcpu->arch.sie_block->gbea,
3649                              (u64 __user *)reg->addr);
3650                 break;
3651         default:
3652                 break;
3653         }
3654
3655         return r;
3656 }
3657
3658 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3659 {
3660         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3661         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3662         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3663
3664         kvm_clear_async_pf_completion_queue(vcpu);
3665         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3666                 kvm_s390_vcpu_stop(vcpu);
3667         kvm_s390_clear_local_irqs(vcpu);
3668 }
3669
3670 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3671 {
3672         /* Initial reset is a superset of the normal reset */
3673         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3674
3675         /*
3676          * This equals initial cpu reset in pop, but we don't switch to ESA.
3677          * We do not only reset the internal data, but also ...
3678          */
3679         vcpu->arch.sie_block->gpsw.mask = 0;
3680         vcpu->arch.sie_block->gpsw.addr = 0;
3681         kvm_s390_set_prefix(vcpu, 0);
3682         kvm_s390_set_cpu_timer(vcpu, 0);
3683         vcpu->arch.sie_block->ckc = 0;
3684         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3685         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3686         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3687
3688         /* ... the data in sync regs */
3689         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3690         vcpu->run->s.regs.ckc = 0;
3691         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3692         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3693         vcpu->run->psw_addr = 0;
3694         vcpu->run->psw_mask = 0;
3695         vcpu->run->s.regs.todpr = 0;
3696         vcpu->run->s.regs.cputm = 0;
3697         vcpu->run->s.regs.ckc = 0;
3698         vcpu->run->s.regs.pp = 0;
3699         vcpu->run->s.regs.gbea = 1;
3700         vcpu->run->s.regs.fpc = 0;
3701         /*
3702          * Do not reset these registers in the protected case, as some of
3703          * them are overlayed and they are not accessible in this case
3704          * anyway.
3705          */
3706         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3707                 vcpu->arch.sie_block->gbea = 1;
3708                 vcpu->arch.sie_block->pp = 0;
3709                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3710                 vcpu->arch.sie_block->todpr = 0;
3711         }
3712 }
3713
3714 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3715 {
3716         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3717
3718         /* Clear reset is a superset of the initial reset */
3719         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3720
3721         memset(&regs->gprs, 0, sizeof(regs->gprs));
3722         memset(&regs->vrs, 0, sizeof(regs->vrs));
3723         memset(&regs->acrs, 0, sizeof(regs->acrs));
3724         memset(&regs->gscb, 0, sizeof(regs->gscb));
3725
3726         regs->etoken = 0;
3727         regs->etoken_extension = 0;
3728 }
3729
3730 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3731 {
3732         vcpu_load(vcpu);
3733         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3734         vcpu_put(vcpu);
3735         return 0;
3736 }
3737
3738 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3739 {
3740         vcpu_load(vcpu);
3741         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3742         vcpu_put(vcpu);
3743         return 0;
3744 }
3745
3746 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3747                                   struct kvm_sregs *sregs)
3748 {
3749         vcpu_load(vcpu);
3750
3751         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3752         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3753
3754         vcpu_put(vcpu);
3755         return 0;
3756 }
3757
3758 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3759                                   struct kvm_sregs *sregs)
3760 {
3761         vcpu_load(vcpu);
3762
3763         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3764         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3765
3766         vcpu_put(vcpu);
3767         return 0;
3768 }
3769
3770 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3771 {
3772         int ret = 0;
3773
3774         vcpu_load(vcpu);
3775
3776         if (test_fp_ctl(fpu->fpc)) {
3777                 ret = -EINVAL;
3778                 goto out;
3779         }
3780         vcpu->run->s.regs.fpc = fpu->fpc;
3781         if (MACHINE_HAS_VX)
3782                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3783                                  (freg_t *) fpu->fprs);
3784         else
3785                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3786
3787 out:
3788         vcpu_put(vcpu);
3789         return ret;
3790 }
3791
3792 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3793 {
3794         vcpu_load(vcpu);
3795
3796         /* make sure we have the latest values */
3797         save_fpu_regs();
3798         if (MACHINE_HAS_VX)
3799                 convert_vx_to_fp((freg_t *) fpu->fprs,
3800                                  (__vector128 *) vcpu->run->s.regs.vrs);
3801         else
3802                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3803         fpu->fpc = vcpu->run->s.regs.fpc;
3804
3805         vcpu_put(vcpu);
3806         return 0;
3807 }
3808
3809 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3810 {
3811         int rc = 0;
3812
3813         if (!is_vcpu_stopped(vcpu))
3814                 rc = -EBUSY;
3815         else {
3816                 vcpu->run->psw_mask = psw.mask;
3817                 vcpu->run->psw_addr = psw.addr;
3818         }
3819         return rc;
3820 }
3821
3822 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3823                                   struct kvm_translation *tr)
3824 {
3825         return -EINVAL; /* not implemented yet */
3826 }
3827
3828 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3829                               KVM_GUESTDBG_USE_HW_BP | \
3830                               KVM_GUESTDBG_ENABLE)
3831
3832 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3833                                         struct kvm_guest_debug *dbg)
3834 {
3835         int rc = 0;
3836
3837         vcpu_load(vcpu);
3838
3839         vcpu->guest_debug = 0;
3840         kvm_s390_clear_bp_data(vcpu);
3841
3842         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3843                 rc = -EINVAL;
3844                 goto out;
3845         }
3846         if (!sclp.has_gpere) {
3847                 rc = -EINVAL;
3848                 goto out;
3849         }
3850
3851         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3852                 vcpu->guest_debug = dbg->control;
3853                 /* enforce guest PER */
3854                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3855
3856                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3857                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3858         } else {
3859                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3860                 vcpu->arch.guestdbg.last_bp = 0;
3861         }
3862
3863         if (rc) {
3864                 vcpu->guest_debug = 0;
3865                 kvm_s390_clear_bp_data(vcpu);
3866                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3867         }
3868
3869 out:
3870         vcpu_put(vcpu);
3871         return rc;
3872 }
3873
3874 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3875                                     struct kvm_mp_state *mp_state)
3876 {
3877         int ret;
3878
3879         vcpu_load(vcpu);
3880
3881         /* CHECK_STOP and LOAD are not supported yet */
3882         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3883                                       KVM_MP_STATE_OPERATING;
3884
3885         vcpu_put(vcpu);
3886         return ret;
3887 }
3888
3889 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3890                                     struct kvm_mp_state *mp_state)
3891 {
3892         int rc = 0;
3893
3894         vcpu_load(vcpu);
3895
3896         /* user space knows about this interface - let it control the state */
3897         kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3898
3899         switch (mp_state->mp_state) {
3900         case KVM_MP_STATE_STOPPED:
3901                 rc = kvm_s390_vcpu_stop(vcpu);
3902                 break;
3903         case KVM_MP_STATE_OPERATING:
3904                 rc = kvm_s390_vcpu_start(vcpu);
3905                 break;
3906         case KVM_MP_STATE_LOAD:
3907                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3908                         rc = -ENXIO;
3909                         break;
3910                 }
3911                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3912                 break;
3913         case KVM_MP_STATE_CHECK_STOP:
3914                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3915         default:
3916                 rc = -ENXIO;
3917         }
3918
3919         vcpu_put(vcpu);
3920         return rc;
3921 }
3922
3923 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3924 {
3925         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3926 }
3927
3928 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3929 {
3930 retry:
3931         kvm_s390_vcpu_request_handled(vcpu);
3932         if (!kvm_request_pending(vcpu))
3933                 return 0;
3934         /*
3935          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3936          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3937          * This ensures that the ipte instruction for this request has
3938          * already finished. We might race against a second unmapper that
3939          * wants to set the blocking bit. Lets just retry the request loop.
3940          */
3941         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3942                 int rc;
3943                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3944                                           kvm_s390_get_prefix(vcpu),
3945                                           PAGE_SIZE * 2, PROT_WRITE);
3946                 if (rc) {
3947                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3948                         return rc;
3949                 }
3950                 goto retry;
3951         }
3952
3953         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3954                 vcpu->arch.sie_block->ihcpu = 0xffff;
3955                 goto retry;
3956         }
3957
3958         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3959                 if (!ibs_enabled(vcpu)) {
3960                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3961                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3962                 }
3963                 goto retry;
3964         }
3965
3966         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3967                 if (ibs_enabled(vcpu)) {
3968                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3969                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3970                 }
3971                 goto retry;
3972         }
3973
3974         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3975                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3976                 goto retry;
3977         }
3978
3979         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3980                 /*
3981                  * Disable CMM virtualization; we will emulate the ESSA
3982                  * instruction manually, in order to provide additional
3983                  * functionalities needed for live migration.
3984                  */
3985                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3986                 goto retry;
3987         }
3988
3989         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3990                 /*
3991                  * Re-enable CMM virtualization if CMMA is available and
3992                  * CMM has been used.
3993                  */
3994                 if ((vcpu->kvm->arch.use_cmma) &&
3995                     (vcpu->kvm->mm->context.uses_cmm))
3996                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3997                 goto retry;
3998         }
3999
4000         /* nothing to do, just clear the request */
4001         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
4002         /* we left the vsie handler, nothing to do, just clear the request */
4003         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4004
4005         return 0;
4006 }
4007
4008 void kvm_s390_set_tod_clock(struct kvm *kvm,
4009                             const struct kvm_s390_vm_tod_clock *gtod)
4010 {
4011         struct kvm_vcpu *vcpu;
4012         union tod_clock clk;
4013         int i;
4014
4015         mutex_lock(&kvm->lock);
4016         preempt_disable();
4017
4018         store_tod_clock_ext(&clk);
4019
4020         kvm->arch.epoch = gtod->tod - clk.tod;
4021         kvm->arch.epdx = 0;
4022         if (test_kvm_facility(kvm, 139)) {
4023                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4024                 if (kvm->arch.epoch > gtod->tod)
4025                         kvm->arch.epdx -= 1;
4026         }
4027
4028         kvm_s390_vcpu_block_all(kvm);
4029         kvm_for_each_vcpu(i, vcpu, kvm) {
4030                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4031                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4032         }
4033
4034         kvm_s390_vcpu_unblock_all(kvm);
4035         preempt_enable();
4036         mutex_unlock(&kvm->lock);
4037 }
4038
4039 /**
4040  * kvm_arch_fault_in_page - fault-in guest page if necessary
4041  * @vcpu: The corresponding virtual cpu
4042  * @gpa: Guest physical address
4043  * @writable: Whether the page should be writable or not
4044  *
4045  * Make sure that a guest page has been faulted-in on the host.
4046  *
4047  * Return: Zero on success, negative error code otherwise.
4048  */
4049 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4050 {
4051         return gmap_fault(vcpu->arch.gmap, gpa,
4052                           writable ? FAULT_FLAG_WRITE : 0);
4053 }
4054
4055 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4056                                       unsigned long token)
4057 {
4058         struct kvm_s390_interrupt inti;
4059         struct kvm_s390_irq irq;
4060
4061         if (start_token) {
4062                 irq.u.ext.ext_params2 = token;
4063                 irq.type = KVM_S390_INT_PFAULT_INIT;
4064                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4065         } else {
4066                 inti.type = KVM_S390_INT_PFAULT_DONE;
4067                 inti.parm64 = token;
4068                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4069         }
4070 }
4071
4072 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4073                                      struct kvm_async_pf *work)
4074 {
4075         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4076         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4077
4078         return true;
4079 }
4080
4081 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4082                                  struct kvm_async_pf *work)
4083 {
4084         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4085         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4086 }
4087
4088 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4089                                struct kvm_async_pf *work)
4090 {
4091         /* s390 will always inject the page directly */
4092 }
4093
4094 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4095 {
4096         /*
4097          * s390 will always inject the page directly,
4098          * but we still want check_async_completion to cleanup
4099          */
4100         return true;
4101 }
4102
4103 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4104 {
4105         hva_t hva;
4106         struct kvm_arch_async_pf arch;
4107
4108         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4109                 return false;
4110         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4111             vcpu->arch.pfault_compare)
4112                 return false;
4113         if (psw_extint_disabled(vcpu))
4114                 return false;
4115         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4116                 return false;
4117         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4118                 return false;
4119         if (!vcpu->arch.gmap->pfault_enabled)
4120                 return false;
4121
4122         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4123         hva += current->thread.gmap_addr & ~PAGE_MASK;
4124         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4125                 return false;
4126
4127         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4128 }
4129
4130 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4131 {
4132         int rc, cpuflags;
4133
4134         /*
4135          * On s390 notifications for arriving pages will be delivered directly
4136          * to the guest but the house keeping for completed pfaults is
4137          * handled outside the worker.
4138          */
4139         kvm_check_async_pf_completion(vcpu);
4140
4141         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4142         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4143
4144         if (need_resched())
4145                 schedule();
4146
4147         if (!kvm_is_ucontrol(vcpu->kvm)) {
4148                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4149                 if (rc)
4150                         return rc;
4151         }
4152
4153         rc = kvm_s390_handle_requests(vcpu);
4154         if (rc)
4155                 return rc;
4156
4157         if (guestdbg_enabled(vcpu)) {
4158                 kvm_s390_backup_guest_per_regs(vcpu);
4159                 kvm_s390_patch_guest_per_regs(vcpu);
4160         }
4161
4162         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4163
4164         vcpu->arch.sie_block->icptcode = 0;
4165         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4166         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4167         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4168
4169         return 0;
4170 }
4171
4172 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4173 {
4174         struct kvm_s390_pgm_info pgm_info = {
4175                 .code = PGM_ADDRESSING,
4176         };
4177         u8 opcode, ilen;
4178         int rc;
4179
4180         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4181         trace_kvm_s390_sie_fault(vcpu);
4182
4183         /*
4184          * We want to inject an addressing exception, which is defined as a
4185          * suppressing or terminating exception. However, since we came here
4186          * by a DAT access exception, the PSW still points to the faulting
4187          * instruction since DAT exceptions are nullifying. So we've got
4188          * to look up the current opcode to get the length of the instruction
4189          * to be able to forward the PSW.
4190          */
4191         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4192         ilen = insn_length(opcode);
4193         if (rc < 0) {
4194                 return rc;
4195         } else if (rc) {
4196                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4197                  * Forward by arbitrary ilc, injection will take care of
4198                  * nullification if necessary.
4199                  */
4200                 pgm_info = vcpu->arch.pgm;
4201                 ilen = 4;
4202         }
4203         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4204         kvm_s390_forward_psw(vcpu, ilen);
4205         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4206 }
4207
4208 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4209 {
4210         struct mcck_volatile_info *mcck_info;
4211         struct sie_page *sie_page;
4212
4213         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4214                    vcpu->arch.sie_block->icptcode);
4215         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4216
4217         if (guestdbg_enabled(vcpu))
4218                 kvm_s390_restore_guest_per_regs(vcpu);
4219
4220         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4221         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4222
4223         if (exit_reason == -EINTR) {
4224                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4225                 sie_page = container_of(vcpu->arch.sie_block,
4226                                         struct sie_page, sie_block);
4227                 mcck_info = &sie_page->mcck_info;
4228                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4229                 return 0;
4230         }
4231
4232         if (vcpu->arch.sie_block->icptcode > 0) {
4233                 int rc = kvm_handle_sie_intercept(vcpu);
4234
4235                 if (rc != -EOPNOTSUPP)
4236                         return rc;
4237                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4238                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4239                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4240                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4241                 return -EREMOTE;
4242         } else if (exit_reason != -EFAULT) {
4243                 vcpu->stat.exit_null++;
4244                 return 0;
4245         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4246                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4247                 vcpu->run->s390_ucontrol.trans_exc_code =
4248                                                 current->thread.gmap_addr;
4249                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4250                 return -EREMOTE;
4251         } else if (current->thread.gmap_pfault) {
4252                 trace_kvm_s390_major_guest_pfault(vcpu);
4253                 current->thread.gmap_pfault = 0;
4254                 if (kvm_arch_setup_async_pf(vcpu))
4255                         return 0;
4256                 vcpu->stat.pfault_sync++;
4257                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4258         }
4259         return vcpu_post_run_fault_in_sie(vcpu);
4260 }
4261
4262 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4263 static int __vcpu_run(struct kvm_vcpu *vcpu)
4264 {
4265         int rc, exit_reason;
4266         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4267
4268         /*
4269          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4270          * ning the guest), so that memslots (and other stuff) are protected
4271          */
4272         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4273
4274         do {
4275                 rc = vcpu_pre_run(vcpu);
4276                 if (rc)
4277                         break;
4278
4279                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4280                 /*
4281                  * As PF_VCPU will be used in fault handler, between
4282                  * guest_enter and guest_exit should be no uaccess.
4283                  */
4284                 local_irq_disable();
4285                 guest_enter_irqoff();
4286                 __disable_cpu_timer_accounting(vcpu);
4287                 local_irq_enable();
4288                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4289                         memcpy(sie_page->pv_grregs,
4290                                vcpu->run->s.regs.gprs,
4291                                sizeof(sie_page->pv_grregs));
4292                 }
4293                 if (test_cpu_flag(CIF_FPU))
4294                         load_fpu_regs();
4295                 exit_reason = sie64a(vcpu->arch.sie_block,
4296                                      vcpu->run->s.regs.gprs);
4297                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4298                         memcpy(vcpu->run->s.regs.gprs,
4299                                sie_page->pv_grregs,
4300                                sizeof(sie_page->pv_grregs));
4301                         /*
4302                          * We're not allowed to inject interrupts on intercepts
4303                          * that leave the guest state in an "in-between" state
4304                          * where the next SIE entry will do a continuation.
4305                          * Fence interrupts in our "internal" PSW.
4306                          */
4307                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4308                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4309                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4310                         }
4311                 }
4312                 local_irq_disable();
4313                 __enable_cpu_timer_accounting(vcpu);
4314                 guest_exit_irqoff();
4315                 local_irq_enable();
4316                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4317
4318                 rc = vcpu_post_run(vcpu, exit_reason);
4319         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4320
4321         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4322         return rc;
4323 }
4324
4325 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4326 {
4327         struct kvm_run *kvm_run = vcpu->run;
4328         struct runtime_instr_cb *riccb;
4329         struct gs_cb *gscb;
4330
4331         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4332         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4333         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4334         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4335         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4336                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4337                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4338                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4339         }
4340         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4341                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4342                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4343                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4344                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4345                         kvm_clear_async_pf_completion_queue(vcpu);
4346         }
4347         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4348                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4349                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4350                 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4351         }
4352         /*
4353          * If userspace sets the riccb (e.g. after migration) to a valid state,
4354          * we should enable RI here instead of doing the lazy enablement.
4355          */
4356         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4357             test_kvm_facility(vcpu->kvm, 64) &&
4358             riccb->v &&
4359             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4360                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4361                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4362         }
4363         /*
4364          * If userspace sets the gscb (e.g. after migration) to non-zero,
4365          * we should enable GS here instead of doing the lazy enablement.
4366          */
4367         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4368             test_kvm_facility(vcpu->kvm, 133) &&
4369             gscb->gssm &&
4370             !vcpu->arch.gs_enabled) {
4371                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4372                 vcpu->arch.sie_block->ecb |= ECB_GS;
4373                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4374                 vcpu->arch.gs_enabled = 1;
4375         }
4376         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4377             test_kvm_facility(vcpu->kvm, 82)) {
4378                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4379                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4380         }
4381         if (MACHINE_HAS_GS) {
4382                 preempt_disable();
4383                 __ctl_set_bit(2, 4);
4384                 if (current->thread.gs_cb) {
4385                         vcpu->arch.host_gscb = current->thread.gs_cb;
4386                         save_gs_cb(vcpu->arch.host_gscb);
4387                 }
4388                 if (vcpu->arch.gs_enabled) {
4389                         current->thread.gs_cb = (struct gs_cb *)
4390                                                 &vcpu->run->s.regs.gscb;
4391                         restore_gs_cb(current->thread.gs_cb);
4392                 }
4393                 preempt_enable();
4394         }
4395         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4396 }
4397
4398 static void sync_regs(struct kvm_vcpu *vcpu)
4399 {
4400         struct kvm_run *kvm_run = vcpu->run;
4401
4402         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4403                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4404         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4405                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4406                 /* some control register changes require a tlb flush */
4407                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4408         }
4409         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4410                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4411                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4412         }
4413         save_access_regs(vcpu->arch.host_acrs);
4414         restore_access_regs(vcpu->run->s.regs.acrs);
4415         /* save host (userspace) fprs/vrs */
4416         save_fpu_regs();
4417         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4418         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4419         if (MACHINE_HAS_VX)
4420                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4421         else
4422                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4423         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4424         if (test_fp_ctl(current->thread.fpu.fpc))
4425                 /* User space provided an invalid FPC, let's clear it */
4426                 current->thread.fpu.fpc = 0;
4427
4428         /* Sync fmt2 only data */
4429         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4430                 sync_regs_fmt2(vcpu);
4431         } else {
4432                 /*
4433                  * In several places we have to modify our internal view to
4434                  * not do things that are disallowed by the ultravisor. For
4435                  * example we must not inject interrupts after specific exits
4436                  * (e.g. 112 prefix page not secure). We do this by turning
4437                  * off the machine check, external and I/O interrupt bits
4438                  * of our PSW copy. To avoid getting validity intercepts, we
4439                  * do only accept the condition code from userspace.
4440                  */
4441                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4442                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4443                                                    PSW_MASK_CC;
4444         }
4445
4446         kvm_run->kvm_dirty_regs = 0;
4447 }
4448
4449 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4450 {
4451         struct kvm_run *kvm_run = vcpu->run;
4452
4453         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4454         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4455         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4456         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4457         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4458         if (MACHINE_HAS_GS) {
4459                 preempt_disable();
4460                 __ctl_set_bit(2, 4);
4461                 if (vcpu->arch.gs_enabled)
4462                         save_gs_cb(current->thread.gs_cb);
4463                 current->thread.gs_cb = vcpu->arch.host_gscb;
4464                 restore_gs_cb(vcpu->arch.host_gscb);
4465                 if (!vcpu->arch.host_gscb)
4466                         __ctl_clear_bit(2, 4);
4467                 vcpu->arch.host_gscb = NULL;
4468                 preempt_enable();
4469         }
4470         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4471 }
4472
4473 static void store_regs(struct kvm_vcpu *vcpu)
4474 {
4475         struct kvm_run *kvm_run = vcpu->run;
4476
4477         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4478         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4479         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4480         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4481         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4482         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4483         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4484         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4485         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4486         save_access_regs(vcpu->run->s.regs.acrs);
4487         restore_access_regs(vcpu->arch.host_acrs);
4488         /* Save guest register state */
4489         save_fpu_regs();
4490         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4491         /* Restore will be done lazily at return */
4492         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4493         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4494         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4495                 store_regs_fmt2(vcpu);
4496 }
4497
4498 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4499 {
4500         struct kvm_run *kvm_run = vcpu->run;
4501         int rc;
4502
4503         if (kvm_run->immediate_exit)
4504                 return -EINTR;
4505
4506         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4507             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4508                 return -EINVAL;
4509
4510         vcpu_load(vcpu);
4511
4512         if (guestdbg_exit_pending(vcpu)) {
4513                 kvm_s390_prepare_debug_exit(vcpu);
4514                 rc = 0;
4515                 goto out;
4516         }
4517
4518         kvm_sigset_activate(vcpu);
4519
4520         /*
4521          * no need to check the return value of vcpu_start as it can only have
4522          * an error for protvirt, but protvirt means user cpu state
4523          */
4524         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4525                 kvm_s390_vcpu_start(vcpu);
4526         } else if (is_vcpu_stopped(vcpu)) {
4527                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4528                                    vcpu->vcpu_id);
4529                 rc = -EINVAL;
4530                 goto out;
4531         }
4532
4533         sync_regs(vcpu);
4534         enable_cpu_timer_accounting(vcpu);
4535
4536         might_fault();
4537         rc = __vcpu_run(vcpu);
4538
4539         if (signal_pending(current) && !rc) {
4540                 kvm_run->exit_reason = KVM_EXIT_INTR;
4541                 rc = -EINTR;
4542         }
4543
4544         if (guestdbg_exit_pending(vcpu) && !rc)  {
4545                 kvm_s390_prepare_debug_exit(vcpu);
4546                 rc = 0;
4547         }
4548
4549         if (rc == -EREMOTE) {
4550                 /* userspace support is needed, kvm_run has been prepared */
4551                 rc = 0;
4552         }
4553
4554         disable_cpu_timer_accounting(vcpu);
4555         store_regs(vcpu);
4556
4557         kvm_sigset_deactivate(vcpu);
4558
4559         vcpu->stat.exit_userspace++;
4560 out:
4561         vcpu_put(vcpu);
4562         return rc;
4563 }
4564
4565 /*
4566  * store status at address
4567  * we use have two special cases:
4568  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4569  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4570  */
4571 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4572 {
4573         unsigned char archmode = 1;
4574         freg_t fprs[NUM_FPRS];
4575         unsigned int px;
4576         u64 clkcomp, cputm;
4577         int rc;
4578
4579         px = kvm_s390_get_prefix(vcpu);
4580         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4581                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4582                         return -EFAULT;
4583                 gpa = 0;
4584         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4585                 if (write_guest_real(vcpu, 163, &archmode, 1))
4586                         return -EFAULT;
4587                 gpa = px;
4588         } else
4589                 gpa -= __LC_FPREGS_SAVE_AREA;
4590
4591         /* manually convert vector registers if necessary */
4592         if (MACHINE_HAS_VX) {
4593                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4594                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4595                                      fprs, 128);
4596         } else {
4597                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4598                                      vcpu->run->s.regs.fprs, 128);
4599         }
4600         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4601                               vcpu->run->s.regs.gprs, 128);
4602         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4603                               &vcpu->arch.sie_block->gpsw, 16);
4604         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4605                               &px, 4);
4606         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4607                               &vcpu->run->s.regs.fpc, 4);
4608         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4609                               &vcpu->arch.sie_block->todpr, 4);
4610         cputm = kvm_s390_get_cpu_timer(vcpu);
4611         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4612                               &cputm, 8);
4613         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4614         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4615                               &clkcomp, 8);
4616         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4617                               &vcpu->run->s.regs.acrs, 64);
4618         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4619                               &vcpu->arch.sie_block->gcr, 128);
4620         return rc ? -EFAULT : 0;
4621 }
4622
4623 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4624 {
4625         /*
4626          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4627          * switch in the run ioctl. Let's update our copies before we save
4628          * it into the save area
4629          */
4630         save_fpu_regs();
4631         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4632         save_access_regs(vcpu->run->s.regs.acrs);
4633
4634         return kvm_s390_store_status_unloaded(vcpu, addr);
4635 }
4636
4637 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4638 {
4639         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4640         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4641 }
4642
4643 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4644 {
4645         unsigned int i;
4646         struct kvm_vcpu *vcpu;
4647
4648         kvm_for_each_vcpu(i, vcpu, kvm) {
4649                 __disable_ibs_on_vcpu(vcpu);
4650         }
4651 }
4652
4653 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4654 {
4655         if (!sclp.has_ibs)
4656                 return;
4657         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4658         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4659 }
4660
4661 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4662 {
4663         int i, online_vcpus, r = 0, started_vcpus = 0;
4664
4665         if (!is_vcpu_stopped(vcpu))
4666                 return 0;
4667
4668         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4669         /* Only one cpu at a time may enter/leave the STOPPED state. */
4670         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4671         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4672
4673         /* Let's tell the UV that we want to change into the operating state */
4674         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4675                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4676                 if (r) {
4677                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4678                         return r;
4679                 }
4680         }
4681
4682         for (i = 0; i < online_vcpus; i++) {
4683                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4684                         started_vcpus++;
4685         }
4686
4687         if (started_vcpus == 0) {
4688                 /* we're the only active VCPU -> speed it up */
4689                 __enable_ibs_on_vcpu(vcpu);
4690         } else if (started_vcpus == 1) {
4691                 /*
4692                  * As we are starting a second VCPU, we have to disable
4693                  * the IBS facility on all VCPUs to remove potentially
4694                  * outstanding ENABLE requests.
4695                  */
4696                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4697         }
4698
4699         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4700         /*
4701          * The real PSW might have changed due to a RESTART interpreted by the
4702          * ultravisor. We block all interrupts and let the next sie exit
4703          * refresh our view.
4704          */
4705         if (kvm_s390_pv_cpu_is_protected(vcpu))
4706                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4707         /*
4708          * Another VCPU might have used IBS while we were offline.
4709          * Let's play safe and flush the VCPU at startup.
4710          */
4711         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4712         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4713         return 0;
4714 }
4715
4716 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4717 {
4718         int i, online_vcpus, r = 0, started_vcpus = 0;
4719         struct kvm_vcpu *started_vcpu = NULL;
4720
4721         if (is_vcpu_stopped(vcpu))
4722                 return 0;
4723
4724         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4725         /* Only one cpu at a time may enter/leave the STOPPED state. */
4726         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4727         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4728
4729         /* Let's tell the UV that we want to change into the stopped state */
4730         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4731                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4732                 if (r) {
4733                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4734                         return r;
4735                 }
4736         }
4737
4738         /*
4739          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4740          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4741          * have been fully processed. This will ensure that the VCPU
4742          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4743          */
4744         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4745         kvm_s390_clear_stop_irq(vcpu);
4746
4747         __disable_ibs_on_vcpu(vcpu);
4748
4749         for (i = 0; i < online_vcpus; i++) {
4750                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4751                         started_vcpus++;
4752                         started_vcpu = vcpu->kvm->vcpus[i];
4753                 }
4754         }
4755
4756         if (started_vcpus == 1) {
4757                 /*
4758                  * As we only have one VCPU left, we want to enable the
4759                  * IBS facility for that VCPU to speed it up.
4760                  */
4761                 __enable_ibs_on_vcpu(started_vcpu);
4762         }
4763
4764         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4765         return 0;
4766 }
4767
4768 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4769                                      struct kvm_enable_cap *cap)
4770 {
4771         int r;
4772
4773         if (cap->flags)
4774                 return -EINVAL;
4775
4776         switch (cap->cap) {
4777         case KVM_CAP_S390_CSS_SUPPORT:
4778                 if (!vcpu->kvm->arch.css_support) {
4779                         vcpu->kvm->arch.css_support = 1;
4780                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4781                         trace_kvm_s390_enable_css(vcpu->kvm);
4782                 }
4783                 r = 0;
4784                 break;
4785         default:
4786                 r = -EINVAL;
4787                 break;
4788         }
4789         return r;
4790 }
4791
4792 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4793                                   struct kvm_s390_mem_op *mop)
4794 {
4795         void __user *uaddr = (void __user *)mop->buf;
4796         int r = 0;
4797
4798         if (mop->flags || !mop->size)
4799                 return -EINVAL;
4800         if (mop->size + mop->sida_offset < mop->size)
4801                 return -EINVAL;
4802         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4803                 return -E2BIG;
4804         if (!kvm_s390_pv_cpu_is_protected(vcpu))
4805                 return -EINVAL;
4806
4807         switch (mop->op) {
4808         case KVM_S390_MEMOP_SIDA_READ:
4809                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4810                                  mop->sida_offset), mop->size))
4811                         r = -EFAULT;
4812
4813                 break;
4814         case KVM_S390_MEMOP_SIDA_WRITE:
4815                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4816                                    mop->sida_offset), uaddr, mop->size))
4817                         r = -EFAULT;
4818                 break;
4819         }
4820         return r;
4821 }
4822
4823 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4824                                  struct kvm_s390_mem_op *mop)
4825 {
4826         void __user *uaddr = (void __user *)mop->buf;
4827         void *tmpbuf = NULL;
4828         int r = 0;
4829         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4830                                     | KVM_S390_MEMOP_F_CHECK_ONLY
4831                                     | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4832
4833         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4834                 return -EINVAL;
4835         if (mop->size > MEM_OP_MAX_SIZE)
4836                 return -E2BIG;
4837         if (kvm_s390_pv_cpu_is_protected(vcpu))
4838                 return -EINVAL;
4839         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4840                 if (access_key_invalid(mop->key))
4841                         return -EINVAL;
4842         } else {
4843                 mop->key = 0;
4844         }
4845         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4846                 tmpbuf = vmalloc(mop->size);
4847                 if (!tmpbuf)
4848                         return -ENOMEM;
4849         }
4850
4851         switch (mop->op) {
4852         case KVM_S390_MEMOP_LOGICAL_READ:
4853                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4854                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4855                                             GACC_FETCH, mop->key);
4856                         break;
4857                 }
4858                 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4859                                         mop->size, mop->key);
4860                 if (r == 0) {
4861                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4862                                 r = -EFAULT;
4863                 }
4864                 break;
4865         case KVM_S390_MEMOP_LOGICAL_WRITE:
4866                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4867                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4868                                             GACC_STORE, mop->key);
4869                         break;
4870                 }
4871                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4872                         r = -EFAULT;
4873                         break;
4874                 }
4875                 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4876                                          mop->size, mop->key);
4877                 break;
4878         }
4879
4880         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4881                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4882
4883         vfree(tmpbuf);
4884         return r;
4885 }
4886
4887 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4888                                      struct kvm_s390_mem_op *mop)
4889 {
4890         int r, srcu_idx;
4891
4892         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4893
4894         switch (mop->op) {
4895         case KVM_S390_MEMOP_LOGICAL_READ:
4896         case KVM_S390_MEMOP_LOGICAL_WRITE:
4897                 r = kvm_s390_vcpu_mem_op(vcpu, mop);
4898                 break;
4899         case KVM_S390_MEMOP_SIDA_READ:
4900         case KVM_S390_MEMOP_SIDA_WRITE:
4901                 /* we are locked against sida going away by the vcpu->mutex */
4902                 r = kvm_s390_vcpu_sida_op(vcpu, mop);
4903                 break;
4904         default:
4905                 r = -EINVAL;
4906         }
4907
4908         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4909         return r;
4910 }
4911
4912 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4913                                unsigned int ioctl, unsigned long arg)
4914 {
4915         struct kvm_vcpu *vcpu = filp->private_data;
4916         void __user *argp = (void __user *)arg;
4917
4918         switch (ioctl) {
4919         case KVM_S390_IRQ: {
4920                 struct kvm_s390_irq s390irq;
4921
4922                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4923                         return -EFAULT;
4924                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4925         }
4926         case KVM_S390_INTERRUPT: {
4927                 struct kvm_s390_interrupt s390int;
4928                 struct kvm_s390_irq s390irq = {};
4929
4930                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4931                         return -EFAULT;
4932                 if (s390int_to_s390irq(&s390int, &s390irq))
4933                         return -EINVAL;
4934                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4935         }
4936         }
4937         return -ENOIOCTLCMD;
4938 }
4939
4940 long kvm_arch_vcpu_ioctl(struct file *filp,
4941                          unsigned int ioctl, unsigned long arg)
4942 {
4943         struct kvm_vcpu *vcpu = filp->private_data;
4944         void __user *argp = (void __user *)arg;
4945         int idx;
4946         long r;
4947         u16 rc, rrc;
4948
4949         vcpu_load(vcpu);
4950
4951         switch (ioctl) {
4952         case KVM_S390_STORE_STATUS:
4953                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4954                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4955                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4956                 break;
4957         case KVM_S390_SET_INITIAL_PSW: {
4958                 psw_t psw;
4959
4960                 r = -EFAULT;
4961                 if (copy_from_user(&psw, argp, sizeof(psw)))
4962                         break;
4963                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4964                 break;
4965         }
4966         case KVM_S390_CLEAR_RESET:
4967                 r = 0;
4968                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4969                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4970                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4971                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4972                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4973                                    rc, rrc);
4974                 }
4975                 break;
4976         case KVM_S390_INITIAL_RESET:
4977                 r = 0;
4978                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4979                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4980                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4981                                           UVC_CMD_CPU_RESET_INITIAL,
4982                                           &rc, &rrc);
4983                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4984                                    rc, rrc);
4985                 }
4986                 break;
4987         case KVM_S390_NORMAL_RESET:
4988                 r = 0;
4989                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4990                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4991                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4992                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4993                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4994                                    rc, rrc);
4995                 }
4996                 break;
4997         case KVM_SET_ONE_REG:
4998         case KVM_GET_ONE_REG: {
4999                 struct kvm_one_reg reg;
5000                 r = -EINVAL;
5001                 if (kvm_s390_pv_cpu_is_protected(vcpu))
5002                         break;
5003                 r = -EFAULT;
5004                 if (copy_from_user(&reg, argp, sizeof(reg)))
5005                         break;
5006                 if (ioctl == KVM_SET_ONE_REG)
5007                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5008                 else
5009                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5010                 break;
5011         }
5012 #ifdef CONFIG_KVM_S390_UCONTROL
5013         case KVM_S390_UCAS_MAP: {
5014                 struct kvm_s390_ucas_mapping ucasmap;
5015
5016                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5017                         r = -EFAULT;
5018                         break;
5019                 }
5020
5021                 if (!kvm_is_ucontrol(vcpu->kvm)) {
5022                         r = -EINVAL;
5023                         break;
5024                 }
5025
5026                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5027                                      ucasmap.vcpu_addr, ucasmap.length);
5028                 break;
5029         }
5030         case KVM_S390_UCAS_UNMAP: {
5031                 struct kvm_s390_ucas_mapping ucasmap;
5032
5033                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5034                         r = -EFAULT;
5035                         break;
5036                 }
5037
5038                 if (!kvm_is_ucontrol(vcpu->kvm)) {
5039                         r = -EINVAL;
5040                         break;
5041                 }
5042
5043                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5044                         ucasmap.length);
5045                 break;
5046         }
5047 #endif
5048         case KVM_S390_VCPU_FAULT: {
5049                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5050                 break;
5051         }
5052         case KVM_ENABLE_CAP:
5053         {
5054                 struct kvm_enable_cap cap;
5055                 r = -EFAULT;
5056                 if (copy_from_user(&cap, argp, sizeof(cap)))
5057                         break;
5058                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5059                 break;
5060         }
5061         case KVM_S390_MEM_OP: {
5062                 struct kvm_s390_mem_op mem_op;
5063
5064                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5065                         r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5066                 else
5067                         r = -EFAULT;
5068                 break;
5069         }
5070         case KVM_S390_SET_IRQ_STATE: {
5071                 struct kvm_s390_irq_state irq_state;
5072
5073                 r = -EFAULT;
5074                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5075                         break;
5076                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5077                     irq_state.len == 0 ||
5078                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5079                         r = -EINVAL;
5080                         break;
5081                 }
5082                 /* do not use irq_state.flags, it will break old QEMUs */
5083                 r = kvm_s390_set_irq_state(vcpu,
5084                                            (void __user *) irq_state.buf,
5085                                            irq_state.len);
5086                 break;
5087         }
5088         case KVM_S390_GET_IRQ_STATE: {
5089                 struct kvm_s390_irq_state irq_state;
5090
5091                 r = -EFAULT;
5092                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5093                         break;
5094                 if (irq_state.len == 0) {
5095                         r = -EINVAL;
5096                         break;
5097                 }
5098                 /* do not use irq_state.flags, it will break old QEMUs */
5099                 r = kvm_s390_get_irq_state(vcpu,
5100                                            (__u8 __user *)  irq_state.buf,
5101                                            irq_state.len);
5102                 break;
5103         }
5104         default:
5105                 r = -ENOTTY;
5106         }
5107
5108         vcpu_put(vcpu);
5109         return r;
5110 }
5111
5112 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5113 {
5114 #ifdef CONFIG_KVM_S390_UCONTROL
5115         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5116                  && (kvm_is_ucontrol(vcpu->kvm))) {
5117                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5118                 get_page(vmf->page);
5119                 return 0;
5120         }
5121 #endif
5122         return VM_FAULT_SIGBUS;
5123 }
5124
5125 /* Section: memory related */
5126 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5127                                    struct kvm_memory_slot *memslot,
5128                                    const struct kvm_userspace_memory_region *mem,
5129                                    enum kvm_mr_change change)
5130 {
5131         /* A few sanity checks. We can have memory slots which have to be
5132            located/ended at a segment boundary (1MB). The memory in userland is
5133            ok to be fragmented into various different vmas. It is okay to mmap()
5134            and munmap() stuff in this slot after doing this call at any time */
5135
5136         if (mem->userspace_addr & 0xffffful)
5137                 return -EINVAL;
5138
5139         if (mem->memory_size & 0xffffful)
5140                 return -EINVAL;
5141
5142         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5143                 return -EINVAL;
5144
5145         /* When we are protected, we should not change the memory slots */
5146         if (kvm_s390_pv_get_handle(kvm))
5147                 return -EINVAL;
5148         return 0;
5149 }
5150
5151 void kvm_arch_commit_memory_region(struct kvm *kvm,
5152                                 const struct kvm_userspace_memory_region *mem,
5153                                 struct kvm_memory_slot *old,
5154                                 const struct kvm_memory_slot *new,
5155                                 enum kvm_mr_change change)
5156 {
5157         int rc = 0;
5158
5159         switch (change) {
5160         case KVM_MR_DELETE:
5161                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5162                                         old->npages * PAGE_SIZE);
5163                 break;
5164         case KVM_MR_MOVE:
5165                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5166                                         old->npages * PAGE_SIZE);
5167                 if (rc)
5168                         break;
5169                 fallthrough;
5170         case KVM_MR_CREATE:
5171                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5172                                       mem->guest_phys_addr, mem->memory_size);
5173                 break;
5174         case KVM_MR_FLAGS_ONLY:
5175                 break;
5176         default:
5177                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5178         }
5179         if (rc)
5180                 pr_warn("failed to commit memory region\n");
5181         return;
5182 }
5183
5184 static inline unsigned long nonhyp_mask(int i)
5185 {
5186         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5187
5188         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5189 }
5190
5191 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5192 {
5193         vcpu->valid_wakeup = false;
5194 }
5195
5196 static int __init kvm_s390_init(void)
5197 {
5198         int i;
5199
5200         if (!sclp.has_sief2) {
5201                 pr_info("SIE is not available\n");
5202                 return -ENODEV;
5203         }
5204
5205         if (nested && hpage) {
5206                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5207                 return -EINVAL;
5208         }
5209
5210         for (i = 0; i < 16; i++)
5211                 kvm_s390_fac_base[i] |=
5212                         stfle_fac_list[i] & nonhyp_mask(i);
5213
5214         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5215 }
5216
5217 static void __exit kvm_s390_exit(void)
5218 {
5219         kvm_exit();
5220 }
5221
5222 module_init(kvm_s390_init);
5223 module_exit(kvm_s390_exit);
5224
5225 /*
5226  * Enable autoloading of the kvm module.
5227  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5228  * since x86 takes a different approach.
5229  */
5230 #include <linux/miscdevice.h>
5231 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5232 MODULE_ALIAS("devname:kvm");