]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - arch/s390/kvm/kvm-s390.c
UBUNTU: Ubuntu-5.15.0-39.42
[mirror_ubuntu-jammy-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
60
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 .name_size = KVM_STATS_NAME_SIZE,
72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 .id_offset = sizeof(struct kvm_stats_header),
74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 sizeof(kvm_vm_stats_desc),
77 };
78
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 KVM_GENERIC_VCPU_STATS(),
81 STATS_DESC_COUNTER(VCPU, exit_userspace),
82 STATS_DESC_COUNTER(VCPU, exit_null),
83 STATS_DESC_COUNTER(VCPU, exit_external_request),
84 STATS_DESC_COUNTER(VCPU, exit_io_request),
85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 STATS_DESC_COUNTER(VCPU, exit_validity),
88 STATS_DESC_COUNTER(VCPU, exit_instruction),
89 STATS_DESC_COUNTER(VCPU, exit_pei),
90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_program),
108 STATS_DESC_COUNTER(VCPU, deliver_io),
109 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 STATS_DESC_COUNTER(VCPU, inject_ckc),
112 STATS_DESC_COUNTER(VCPU, inject_cputm),
113 STATS_DESC_COUNTER(VCPU, inject_external_call),
114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 STATS_DESC_COUNTER(VCPU, inject_mchk),
116 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 STATS_DESC_COUNTER(VCPU, inject_program),
118 STATS_DESC_COUNTER(VCPU, inject_restart),
119 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 STATS_DESC_COUNTER(VCPU, instruction_gs),
123 STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 STATS_DESC_COUNTER(VCPU, instruction_sck),
129 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 STATS_DESC_COUNTER(VCPU, instruction_spx),
132 STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 STATS_DESC_COUNTER(VCPU, instruction_stap),
134 STATS_DESC_COUNTER(VCPU, instruction_iske),
135 STATS_DESC_COUNTER(VCPU, instruction_ri),
136 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 STATS_DESC_COUNTER(VCPU, instruction_sske),
138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 STATS_DESC_COUNTER(VCPU, instruction_tb),
142 STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 STATS_DESC_COUNTER(VCPU, instruction_sie),
146 STATS_DESC_COUNTER(VCPU, instruction_essa),
147 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 .name_size = KVM_STATS_NAME_SIZE,
178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 .id_offset = sizeof(struct kvm_stats_header),
180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 sizeof(kvm_vcpu_stats_desc),
183 };
184
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194
195 /* maximum percentage of steal time for polling. >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209
210 /*
211 * For now we handle at most 16 double words as this is what the s390 base
212 * kernel handles and stores in the prefix page. If we ever need to go beyond
213 * this, this requires changes to code, but the external uapi can stay.
214 */
215 #define SIZE_INTERNAL 16
216
217 /*
218 * Base feature mask that defines default mask for facilities. Consists of the
219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220 */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224 * and defines the facilities that can be enabled via a cpu model.
225 */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227
228 static unsigned long kvm_s390_fac_size(void)
229 {
230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 sizeof(stfle_fac_list));
234
235 return SIZE_INTERNAL;
236 }
237
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
250 {
251 /* every s390 is virtualization enabled ;-) */
252 return 0;
253 }
254
255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 return 0;
258 }
259
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 u8 delta_idx = 0;
268
269 /*
270 * The TOD jumps by delta, we have to compensate this by adding
271 * -delta to the epoch.
272 */
273 delta = -delta;
274
275 /* sign-extension - we're adding to signed values below */
276 if ((s64)delta < 0)
277 delta_idx = -1;
278
279 scb->epoch += delta;
280 if (scb->ecd & ECD_MEF) {
281 scb->epdx += delta_idx;
282 if (scb->epoch < delta)
283 scb->epdx += 1;
284 }
285 }
286
287 /*
288 * This callback is executed during stop_machine(). All CPUs are therefore
289 * temporarily stopped. In order not to change guest behavior, we have to
290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291 * so a CPU won't be stopped while calculating with the epoch.
292 */
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 void *v)
295 {
296 struct kvm *kvm;
297 struct kvm_vcpu *vcpu;
298 int i;
299 unsigned long long *delta = v;
300
301 list_for_each_entry(kvm, &vm_list, vm_list) {
302 kvm_for_each_vcpu(i, vcpu, kvm) {
303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 if (i == 0) {
305 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 }
308 if (vcpu->arch.cputm_enabled)
309 vcpu->arch.cputm_start += *delta;
310 if (vcpu->arch.vsie_block)
311 kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 *delta);
313 }
314 }
315 return NOTIFY_OK;
316 }
317
318 static struct notifier_block kvm_clock_notifier = {
319 .notifier_call = kvm_clock_sync,
320 };
321
322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 gmap_notifier.notifier_call = kvm_gmap_notifier;
325 gmap_register_pte_notifier(&gmap_notifier);
326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 gmap_register_pte_notifier(&vsie_gmap_notifier);
328 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 &kvm_clock_notifier);
330 return 0;
331 }
332
333 void kvm_arch_hardware_unsetup(void)
334 {
335 gmap_unregister_pte_notifier(&gmap_notifier);
336 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 &kvm_clock_notifier);
339 }
340
341 static void allow_cpu_feat(unsigned long nr)
342 {
343 set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345
346 static inline int plo_test_bit(unsigned char nr)
347 {
348 unsigned long function = (unsigned long)nr | 0x100;
349 int cc;
350
351 asm volatile(
352 " lgr 0,%[function]\n"
353 /* Parameter registers are ignored for "test bit" */
354 " plo 0,0,0,0(0)\n"
355 " ipm %0\n"
356 " srl %0,28\n"
357 : "=d" (cc)
358 : [function] "d" (function)
359 : "cc", "0");
360 return cc == 0;
361 }
362
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 asm volatile(
366 " lghi 0,0\n"
367 " lgr 1,%[query]\n"
368 /* Parameter registers are ignored */
369 " .insn rrf,%[opc] << 16,2,4,6,0\n"
370 :
371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 : "cc", "memory", "0", "1");
373 }
374
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377
378 static void kvm_s390_cpu_feat_init(void)
379 {
380 int i;
381
382 for (i = 0; i < 256; ++i) {
383 if (plo_test_bit(i))
384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 }
386
387 if (test_facility(28)) /* TOD-clock steering */
388 ptff(kvm_s390_available_subfunc.ptff,
389 sizeof(kvm_s390_available_subfunc.ptff),
390 PTFF_QAF);
391
392 if (test_facility(17)) { /* MSA */
393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 kvm_s390_available_subfunc.kmac);
395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmc);
397 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.km);
399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kimd);
401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.klmd);
403 }
404 if (test_facility(76)) /* MSA3 */
405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.pckmo);
407 if (test_facility(77)) { /* MSA4 */
408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kmctr);
410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kmf);
412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmo);
414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.pcc);
416 }
417 if (test_facility(57)) /* MSA5 */
418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.ppno);
420
421 if (test_facility(146)) /* MSA8 */
422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kma);
424
425 if (test_facility(155)) /* MSA9 */
426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kdsa);
428
429 if (test_facility(150)) /* SORTL */
430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431
432 if (test_facility(151)) /* DFLTCC */
433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434
435 if (MACHINE_HAS_ESOP)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 /*
438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 */
441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 !test_facility(3) || !nested)
443 return;
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 if (sclp.has_64bscao)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 if (sclp.has_siif)
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 if (sclp.has_gpere)
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 if (sclp.has_gsls)
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 if (sclp.has_ib)
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 if (sclp.has_cei)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 if (sclp.has_ibs)
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 if (sclp.has_kss)
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 /*
462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 * all skey handling functions read/set the skey from the PGSTE
464 * instead of the real storage key.
465 *
466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 * pages being detected as preserved although they are resident.
468 *
469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 *
472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 *
476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 * cannot easily shadow the SCA because of the ipte lock.
478 */
479 }
480
481 int kvm_arch_init(void *opaque)
482 {
483 int rc = -ENOMEM;
484
485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 if (!kvm_s390_dbf)
487 return -ENOMEM;
488
489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf_uv)
491 goto out;
492
493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 goto out;
496
497 kvm_s390_cpu_feat_init();
498
499 /* Register floating interrupt controller interface. */
500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 if (rc) {
502 pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 goto out;
504 }
505
506 rc = kvm_s390_gib_init(GAL_ISC);
507 if (rc)
508 goto out;
509
510 return 0;
511
512 out:
513 kvm_arch_exit();
514 return rc;
515 }
516
517 void kvm_arch_exit(void)
518 {
519 kvm_s390_gib_destroy();
520 debug_unregister(kvm_s390_dbf);
521 debug_unregister(kvm_s390_dbf_uv);
522 }
523
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 unsigned int ioctl, unsigned long arg)
527 {
528 if (ioctl == KVM_S390_ENABLE_SIE)
529 return s390_enable_sie();
530 return -EINVAL;
531 }
532
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 int r;
536
537 switch (ext) {
538 case KVM_CAP_S390_PSW:
539 case KVM_CAP_S390_GMAP:
540 case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 case KVM_CAP_S390_UCONTROL:
543 #endif
544 case KVM_CAP_ASYNC_PF:
545 case KVM_CAP_SYNC_REGS:
546 case KVM_CAP_ONE_REG:
547 case KVM_CAP_ENABLE_CAP:
548 case KVM_CAP_S390_CSS_SUPPORT:
549 case KVM_CAP_IOEVENTFD:
550 case KVM_CAP_DEVICE_CTRL:
551 case KVM_CAP_S390_IRQCHIP:
552 case KVM_CAP_VM_ATTRIBUTES:
553 case KVM_CAP_MP_STATE:
554 case KVM_CAP_IMMEDIATE_EXIT:
555 case KVM_CAP_S390_INJECT_IRQ:
556 case KVM_CAP_S390_USER_SIGP:
557 case KVM_CAP_S390_USER_STSI:
558 case KVM_CAP_S390_SKEYS:
559 case KVM_CAP_S390_IRQ_STATE:
560 case KVM_CAP_S390_USER_INSTR0:
561 case KVM_CAP_S390_CMMA_MIGRATION:
562 case KVM_CAP_S390_AIS:
563 case KVM_CAP_S390_AIS_MIGRATION:
564 case KVM_CAP_S390_VCPU_RESETS:
565 case KVM_CAP_SET_GUEST_DEBUG:
566 case KVM_CAP_S390_DIAG318:
567 case KVM_CAP_S390_MEM_OP_EXTENSION:
568 r = 1;
569 break;
570 case KVM_CAP_SET_GUEST_DEBUG2:
571 r = KVM_GUESTDBG_VALID_MASK;
572 break;
573 case KVM_CAP_S390_HPAGE_1M:
574 r = 0;
575 if (hpage && !kvm_is_ucontrol(kvm))
576 r = 1;
577 break;
578 case KVM_CAP_S390_MEM_OP:
579 r = MEM_OP_MAX_SIZE;
580 break;
581 case KVM_CAP_NR_VCPUS:
582 case KVM_CAP_MAX_VCPUS:
583 case KVM_CAP_MAX_VCPU_ID:
584 r = KVM_S390_BSCA_CPU_SLOTS;
585 if (!kvm_s390_use_sca_entries())
586 r = KVM_MAX_VCPUS;
587 else if (sclp.has_esca && sclp.has_64bscao)
588 r = KVM_S390_ESCA_CPU_SLOTS;
589 break;
590 case KVM_CAP_S390_COW:
591 r = MACHINE_HAS_ESOP;
592 break;
593 case KVM_CAP_S390_VECTOR_REGISTERS:
594 r = MACHINE_HAS_VX;
595 break;
596 case KVM_CAP_S390_RI:
597 r = test_facility(64);
598 break;
599 case KVM_CAP_S390_GS:
600 r = test_facility(133);
601 break;
602 case KVM_CAP_S390_BPB:
603 r = test_facility(82);
604 break;
605 case KVM_CAP_S390_PROTECTED:
606 r = is_prot_virt_host();
607 break;
608 default:
609 r = 0;
610 }
611 return r;
612 }
613
614 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
615 {
616 int i;
617 gfn_t cur_gfn, last_gfn;
618 unsigned long gaddr, vmaddr;
619 struct gmap *gmap = kvm->arch.gmap;
620 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
621
622 /* Loop over all guest segments */
623 cur_gfn = memslot->base_gfn;
624 last_gfn = memslot->base_gfn + memslot->npages;
625 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
626 gaddr = gfn_to_gpa(cur_gfn);
627 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
628 if (kvm_is_error_hva(vmaddr))
629 continue;
630
631 bitmap_zero(bitmap, _PAGE_ENTRIES);
632 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
633 for (i = 0; i < _PAGE_ENTRIES; i++) {
634 if (test_bit(i, bitmap))
635 mark_page_dirty(kvm, cur_gfn + i);
636 }
637
638 if (fatal_signal_pending(current))
639 return;
640 cond_resched();
641 }
642 }
643
644 /* Section: vm related */
645 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
646
647 /*
648 * Get (and clear) the dirty memory log for a memory slot.
649 */
650 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
651 struct kvm_dirty_log *log)
652 {
653 int r;
654 unsigned long n;
655 struct kvm_memory_slot *memslot;
656 int is_dirty;
657
658 if (kvm_is_ucontrol(kvm))
659 return -EINVAL;
660
661 mutex_lock(&kvm->slots_lock);
662
663 r = -EINVAL;
664 if (log->slot >= KVM_USER_MEM_SLOTS)
665 goto out;
666
667 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
668 if (r)
669 goto out;
670
671 /* Clear the dirty log */
672 if (is_dirty) {
673 n = kvm_dirty_bitmap_bytes(memslot);
674 memset(memslot->dirty_bitmap, 0, n);
675 }
676 r = 0;
677 out:
678 mutex_unlock(&kvm->slots_lock);
679 return r;
680 }
681
682 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
683 {
684 unsigned int i;
685 struct kvm_vcpu *vcpu;
686
687 kvm_for_each_vcpu(i, vcpu, kvm) {
688 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
689 }
690 }
691
692 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
693 {
694 int r;
695
696 if (cap->flags)
697 return -EINVAL;
698
699 switch (cap->cap) {
700 case KVM_CAP_S390_IRQCHIP:
701 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
702 kvm->arch.use_irqchip = 1;
703 r = 0;
704 break;
705 case KVM_CAP_S390_USER_SIGP:
706 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
707 kvm->arch.user_sigp = 1;
708 r = 0;
709 break;
710 case KVM_CAP_S390_VECTOR_REGISTERS:
711 mutex_lock(&kvm->lock);
712 if (kvm->created_vcpus) {
713 r = -EBUSY;
714 } else if (MACHINE_HAS_VX) {
715 set_kvm_facility(kvm->arch.model.fac_mask, 129);
716 set_kvm_facility(kvm->arch.model.fac_list, 129);
717 if (test_facility(134)) {
718 set_kvm_facility(kvm->arch.model.fac_mask, 134);
719 set_kvm_facility(kvm->arch.model.fac_list, 134);
720 }
721 if (test_facility(135)) {
722 set_kvm_facility(kvm->arch.model.fac_mask, 135);
723 set_kvm_facility(kvm->arch.model.fac_list, 135);
724 }
725 if (test_facility(148)) {
726 set_kvm_facility(kvm->arch.model.fac_mask, 148);
727 set_kvm_facility(kvm->arch.model.fac_list, 148);
728 }
729 if (test_facility(152)) {
730 set_kvm_facility(kvm->arch.model.fac_mask, 152);
731 set_kvm_facility(kvm->arch.model.fac_list, 152);
732 }
733 if (test_facility(192)) {
734 set_kvm_facility(kvm->arch.model.fac_mask, 192);
735 set_kvm_facility(kvm->arch.model.fac_list, 192);
736 }
737 r = 0;
738 } else
739 r = -EINVAL;
740 mutex_unlock(&kvm->lock);
741 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
742 r ? "(not available)" : "(success)");
743 break;
744 case KVM_CAP_S390_RI:
745 r = -EINVAL;
746 mutex_lock(&kvm->lock);
747 if (kvm->created_vcpus) {
748 r = -EBUSY;
749 } else if (test_facility(64)) {
750 set_kvm_facility(kvm->arch.model.fac_mask, 64);
751 set_kvm_facility(kvm->arch.model.fac_list, 64);
752 r = 0;
753 }
754 mutex_unlock(&kvm->lock);
755 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
756 r ? "(not available)" : "(success)");
757 break;
758 case KVM_CAP_S390_AIS:
759 mutex_lock(&kvm->lock);
760 if (kvm->created_vcpus) {
761 r = -EBUSY;
762 } else {
763 set_kvm_facility(kvm->arch.model.fac_mask, 72);
764 set_kvm_facility(kvm->arch.model.fac_list, 72);
765 r = 0;
766 }
767 mutex_unlock(&kvm->lock);
768 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
769 r ? "(not available)" : "(success)");
770 break;
771 case KVM_CAP_S390_GS:
772 r = -EINVAL;
773 mutex_lock(&kvm->lock);
774 if (kvm->created_vcpus) {
775 r = -EBUSY;
776 } else if (test_facility(133)) {
777 set_kvm_facility(kvm->arch.model.fac_mask, 133);
778 set_kvm_facility(kvm->arch.model.fac_list, 133);
779 r = 0;
780 }
781 mutex_unlock(&kvm->lock);
782 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
783 r ? "(not available)" : "(success)");
784 break;
785 case KVM_CAP_S390_HPAGE_1M:
786 mutex_lock(&kvm->lock);
787 if (kvm->created_vcpus)
788 r = -EBUSY;
789 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
790 r = -EINVAL;
791 else {
792 r = 0;
793 mmap_write_lock(kvm->mm);
794 kvm->mm->context.allow_gmap_hpage_1m = 1;
795 mmap_write_unlock(kvm->mm);
796 /*
797 * We might have to create fake 4k page
798 * tables. To avoid that the hardware works on
799 * stale PGSTEs, we emulate these instructions.
800 */
801 kvm->arch.use_skf = 0;
802 kvm->arch.use_pfmfi = 0;
803 }
804 mutex_unlock(&kvm->lock);
805 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
806 r ? "(not available)" : "(success)");
807 break;
808 case KVM_CAP_S390_USER_STSI:
809 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
810 kvm->arch.user_stsi = 1;
811 r = 0;
812 break;
813 case KVM_CAP_S390_USER_INSTR0:
814 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
815 kvm->arch.user_instr0 = 1;
816 icpt_operexc_on_all_vcpus(kvm);
817 r = 0;
818 break;
819 default:
820 r = -EINVAL;
821 break;
822 }
823 return r;
824 }
825
826 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
827 {
828 int ret;
829
830 switch (attr->attr) {
831 case KVM_S390_VM_MEM_LIMIT_SIZE:
832 ret = 0;
833 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
834 kvm->arch.mem_limit);
835 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
836 ret = -EFAULT;
837 break;
838 default:
839 ret = -ENXIO;
840 break;
841 }
842 return ret;
843 }
844
845 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
846 {
847 int ret;
848 unsigned int idx;
849 switch (attr->attr) {
850 case KVM_S390_VM_MEM_ENABLE_CMMA:
851 ret = -ENXIO;
852 if (!sclp.has_cmma)
853 break;
854
855 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
856 mutex_lock(&kvm->lock);
857 if (kvm->created_vcpus)
858 ret = -EBUSY;
859 else if (kvm->mm->context.allow_gmap_hpage_1m)
860 ret = -EINVAL;
861 else {
862 kvm->arch.use_cmma = 1;
863 /* Not compatible with cmma. */
864 kvm->arch.use_pfmfi = 0;
865 ret = 0;
866 }
867 mutex_unlock(&kvm->lock);
868 break;
869 case KVM_S390_VM_MEM_CLR_CMMA:
870 ret = -ENXIO;
871 if (!sclp.has_cmma)
872 break;
873 ret = -EINVAL;
874 if (!kvm->arch.use_cmma)
875 break;
876
877 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
878 mutex_lock(&kvm->lock);
879 idx = srcu_read_lock(&kvm->srcu);
880 s390_reset_cmma(kvm->arch.gmap->mm);
881 srcu_read_unlock(&kvm->srcu, idx);
882 mutex_unlock(&kvm->lock);
883 ret = 0;
884 break;
885 case KVM_S390_VM_MEM_LIMIT_SIZE: {
886 unsigned long new_limit;
887
888 if (kvm_is_ucontrol(kvm))
889 return -EINVAL;
890
891 if (get_user(new_limit, (u64 __user *)attr->addr))
892 return -EFAULT;
893
894 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
895 new_limit > kvm->arch.mem_limit)
896 return -E2BIG;
897
898 if (!new_limit)
899 return -EINVAL;
900
901 /* gmap_create takes last usable address */
902 if (new_limit != KVM_S390_NO_MEM_LIMIT)
903 new_limit -= 1;
904
905 ret = -EBUSY;
906 mutex_lock(&kvm->lock);
907 if (!kvm->created_vcpus) {
908 /* gmap_create will round the limit up */
909 struct gmap *new = gmap_create(current->mm, new_limit);
910
911 if (!new) {
912 ret = -ENOMEM;
913 } else {
914 gmap_remove(kvm->arch.gmap);
915 new->private = kvm;
916 kvm->arch.gmap = new;
917 ret = 0;
918 }
919 }
920 mutex_unlock(&kvm->lock);
921 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
922 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
923 (void *) kvm->arch.gmap->asce);
924 break;
925 }
926 default:
927 ret = -ENXIO;
928 break;
929 }
930 return ret;
931 }
932
933 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
934
935 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
936 {
937 struct kvm_vcpu *vcpu;
938 int i;
939
940 kvm_s390_vcpu_block_all(kvm);
941
942 kvm_for_each_vcpu(i, vcpu, kvm) {
943 kvm_s390_vcpu_crypto_setup(vcpu);
944 /* recreate the shadow crycb by leaving the VSIE handler */
945 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
946 }
947
948 kvm_s390_vcpu_unblock_all(kvm);
949 }
950
951 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
952 {
953 mutex_lock(&kvm->lock);
954 switch (attr->attr) {
955 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
956 if (!test_kvm_facility(kvm, 76)) {
957 mutex_unlock(&kvm->lock);
958 return -EINVAL;
959 }
960 get_random_bytes(
961 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
962 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
963 kvm->arch.crypto.aes_kw = 1;
964 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
965 break;
966 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
967 if (!test_kvm_facility(kvm, 76)) {
968 mutex_unlock(&kvm->lock);
969 return -EINVAL;
970 }
971 get_random_bytes(
972 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
973 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
974 kvm->arch.crypto.dea_kw = 1;
975 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
976 break;
977 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
978 if (!test_kvm_facility(kvm, 76)) {
979 mutex_unlock(&kvm->lock);
980 return -EINVAL;
981 }
982 kvm->arch.crypto.aes_kw = 0;
983 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
984 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
985 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
986 break;
987 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
988 if (!test_kvm_facility(kvm, 76)) {
989 mutex_unlock(&kvm->lock);
990 return -EINVAL;
991 }
992 kvm->arch.crypto.dea_kw = 0;
993 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
994 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
995 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
996 break;
997 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
998 if (!ap_instructions_available()) {
999 mutex_unlock(&kvm->lock);
1000 return -EOPNOTSUPP;
1001 }
1002 kvm->arch.crypto.apie = 1;
1003 break;
1004 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1005 if (!ap_instructions_available()) {
1006 mutex_unlock(&kvm->lock);
1007 return -EOPNOTSUPP;
1008 }
1009 kvm->arch.crypto.apie = 0;
1010 break;
1011 default:
1012 mutex_unlock(&kvm->lock);
1013 return -ENXIO;
1014 }
1015
1016 kvm_s390_vcpu_crypto_reset_all(kvm);
1017 mutex_unlock(&kvm->lock);
1018 return 0;
1019 }
1020
1021 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1022 {
1023 int cx;
1024 struct kvm_vcpu *vcpu;
1025
1026 kvm_for_each_vcpu(cx, vcpu, kvm)
1027 kvm_s390_sync_request(req, vcpu);
1028 }
1029
1030 /*
1031 * Must be called with kvm->srcu held to avoid races on memslots, and with
1032 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1033 */
1034 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1035 {
1036 struct kvm_memory_slot *ms;
1037 struct kvm_memslots *slots;
1038 unsigned long ram_pages = 0;
1039 int slotnr;
1040
1041 /* migration mode already enabled */
1042 if (kvm->arch.migration_mode)
1043 return 0;
1044 slots = kvm_memslots(kvm);
1045 if (!slots || !slots->used_slots)
1046 return -EINVAL;
1047
1048 if (!kvm->arch.use_cmma) {
1049 kvm->arch.migration_mode = 1;
1050 return 0;
1051 }
1052 /* mark all the pages in active slots as dirty */
1053 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1054 ms = slots->memslots + slotnr;
1055 if (!ms->dirty_bitmap)
1056 return -EINVAL;
1057 /*
1058 * The second half of the bitmap is only used on x86,
1059 * and would be wasted otherwise, so we put it to good
1060 * use here to keep track of the state of the storage
1061 * attributes.
1062 */
1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 ram_pages += ms->npages;
1065 }
1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 kvm->arch.migration_mode = 1;
1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069 return 0;
1070 }
1071
1072 /*
1073 * Must be called with kvm->slots_lock to avoid races with ourselves and
1074 * kvm_s390_vm_start_migration.
1075 */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078 /* migration mode already disabled */
1079 if (!kvm->arch.migration_mode)
1080 return 0;
1081 kvm->arch.migration_mode = 0;
1082 if (kvm->arch.use_cmma)
1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084 return 0;
1085 }
1086
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 struct kvm_device_attr *attr)
1089 {
1090 int res = -ENXIO;
1091
1092 mutex_lock(&kvm->slots_lock);
1093 switch (attr->attr) {
1094 case KVM_S390_VM_MIGRATION_START:
1095 res = kvm_s390_vm_start_migration(kvm);
1096 break;
1097 case KVM_S390_VM_MIGRATION_STOP:
1098 res = kvm_s390_vm_stop_migration(kvm);
1099 break;
1100 default:
1101 break;
1102 }
1103 mutex_unlock(&kvm->slots_lock);
1104
1105 return res;
1106 }
1107
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 struct kvm_device_attr *attr)
1110 {
1111 u64 mig = kvm->arch.migration_mode;
1112
1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114 return -ENXIO;
1115
1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117 return -EFAULT;
1118 return 0;
1119 }
1120
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 struct kvm_s390_vm_tod_clock gtod;
1124
1125 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126 return -EFAULT;
1127
1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129 return -EINVAL;
1130 kvm_s390_set_tod_clock(kvm, &gtod);
1131
1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 gtod.epoch_idx, gtod.tod);
1134
1135 return 0;
1136 }
1137
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 u8 gtod_high;
1141
1142 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143 sizeof(gtod_high)))
1144 return -EFAULT;
1145
1146 if (gtod_high != 0)
1147 return -EINVAL;
1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149
1150 return 0;
1151 }
1152
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155 struct kvm_s390_vm_tod_clock gtod = { 0 };
1156
1157 if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158 sizeof(gtod.tod)))
1159 return -EFAULT;
1160
1161 kvm_s390_set_tod_clock(kvm, &gtod);
1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163 return 0;
1164 }
1165
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 int ret;
1169
1170 if (attr->flags)
1171 return -EINVAL;
1172
1173 switch (attr->attr) {
1174 case KVM_S390_VM_TOD_EXT:
1175 ret = kvm_s390_set_tod_ext(kvm, attr);
1176 break;
1177 case KVM_S390_VM_TOD_HIGH:
1178 ret = kvm_s390_set_tod_high(kvm, attr);
1179 break;
1180 case KVM_S390_VM_TOD_LOW:
1181 ret = kvm_s390_set_tod_low(kvm, attr);
1182 break;
1183 default:
1184 ret = -ENXIO;
1185 break;
1186 }
1187 return ret;
1188 }
1189
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193 union tod_clock clk;
1194
1195 preempt_disable();
1196
1197 store_tod_clock_ext(&clk);
1198
1199 gtod->tod = clk.tod + kvm->arch.epoch;
1200 gtod->epoch_idx = 0;
1201 if (test_kvm_facility(kvm, 139)) {
1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 if (gtod->tod < clk.tod)
1204 gtod->epoch_idx += 1;
1205 }
1206
1207 preempt_enable();
1208 }
1209
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 struct kvm_s390_vm_tod_clock gtod;
1213
1214 memset(&gtod, 0, sizeof(gtod));
1215 kvm_s390_get_tod_clock(kvm, &gtod);
1216 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 return -EFAULT;
1218
1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 gtod.epoch_idx, gtod.tod);
1221 return 0;
1222 }
1223
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 u8 gtod_high = 0;
1227
1228 if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229 sizeof(gtod_high)))
1230 return -EFAULT;
1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232
1233 return 0;
1234 }
1235
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 u64 gtod;
1239
1240 gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242 return -EFAULT;
1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244
1245 return 0;
1246 }
1247
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250 int ret;
1251
1252 if (attr->flags)
1253 return -EINVAL;
1254
1255 switch (attr->attr) {
1256 case KVM_S390_VM_TOD_EXT:
1257 ret = kvm_s390_get_tod_ext(kvm, attr);
1258 break;
1259 case KVM_S390_VM_TOD_HIGH:
1260 ret = kvm_s390_get_tod_high(kvm, attr);
1261 break;
1262 case KVM_S390_VM_TOD_LOW:
1263 ret = kvm_s390_get_tod_low(kvm, attr);
1264 break;
1265 default:
1266 ret = -ENXIO;
1267 break;
1268 }
1269 return ret;
1270 }
1271
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274 struct kvm_s390_vm_cpu_processor *proc;
1275 u16 lowest_ibc, unblocked_ibc;
1276 int ret = 0;
1277
1278 mutex_lock(&kvm->lock);
1279 if (kvm->created_vcpus) {
1280 ret = -EBUSY;
1281 goto out;
1282 }
1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284 if (!proc) {
1285 ret = -ENOMEM;
1286 goto out;
1287 }
1288 if (!copy_from_user(proc, (void __user *)attr->addr,
1289 sizeof(*proc))) {
1290 kvm->arch.model.cpuid = proc->cpuid;
1291 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 unblocked_ibc = sclp.ibc & 0xfff;
1293 if (lowest_ibc && proc->ibc) {
1294 if (proc->ibc > unblocked_ibc)
1295 kvm->arch.model.ibc = unblocked_ibc;
1296 else if (proc->ibc < lowest_ibc)
1297 kvm->arch.model.ibc = lowest_ibc;
1298 else
1299 kvm->arch.model.ibc = proc->ibc;
1300 }
1301 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 kvm->arch.model.ibc,
1305 kvm->arch.model.cpuid);
1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 kvm->arch.model.fac_list[0],
1308 kvm->arch.model.fac_list[1],
1309 kvm->arch.model.fac_list[2]);
1310 } else
1311 ret = -EFAULT;
1312 kfree(proc);
1313 out:
1314 mutex_unlock(&kvm->lock);
1315 return ret;
1316 }
1317
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 struct kvm_device_attr *attr)
1320 {
1321 struct kvm_s390_vm_cpu_feat data;
1322
1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324 return -EFAULT;
1325 if (!bitmap_subset((unsigned long *) data.feat,
1326 kvm_s390_available_cpu_feat,
1327 KVM_S390_VM_CPU_FEAT_NR_BITS))
1328 return -EINVAL;
1329
1330 mutex_lock(&kvm->lock);
1331 if (kvm->created_vcpus) {
1332 mutex_unlock(&kvm->lock);
1333 return -EBUSY;
1334 }
1335 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336 KVM_S390_VM_CPU_FEAT_NR_BITS);
1337 mutex_unlock(&kvm->lock);
1338 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339 data.feat[0],
1340 data.feat[1],
1341 data.feat[2]);
1342 return 0;
1343 }
1344
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346 struct kvm_device_attr *attr)
1347 {
1348 mutex_lock(&kvm->lock);
1349 if (kvm->created_vcpus) {
1350 mutex_unlock(&kvm->lock);
1351 return -EBUSY;
1352 }
1353
1354 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356 mutex_unlock(&kvm->lock);
1357 return -EFAULT;
1358 }
1359 mutex_unlock(&kvm->lock);
1360
1361 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418
1419 return 0;
1420 }
1421
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424 int ret = -ENXIO;
1425
1426 switch (attr->attr) {
1427 case KVM_S390_VM_CPU_PROCESSOR:
1428 ret = kvm_s390_set_processor(kvm, attr);
1429 break;
1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431 ret = kvm_s390_set_processor_feat(kvm, attr);
1432 break;
1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435 break;
1436 }
1437 return ret;
1438 }
1439
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442 struct kvm_s390_vm_cpu_processor *proc;
1443 int ret = 0;
1444
1445 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446 if (!proc) {
1447 ret = -ENOMEM;
1448 goto out;
1449 }
1450 proc->cpuid = kvm->arch.model.cpuid;
1451 proc->ibc = kvm->arch.model.ibc;
1452 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453 S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 kvm->arch.model.ibc,
1456 kvm->arch.model.cpuid);
1457 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 kvm->arch.model.fac_list[0],
1459 kvm->arch.model.fac_list[1],
1460 kvm->arch.model.fac_list[2]);
1461 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462 ret = -EFAULT;
1463 kfree(proc);
1464 out:
1465 return ret;
1466 }
1467
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470 struct kvm_s390_vm_cpu_machine *mach;
1471 int ret = 0;
1472
1473 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474 if (!mach) {
1475 ret = -ENOMEM;
1476 goto out;
1477 }
1478 get_cpu_id((struct cpuid *) &mach->cpuid);
1479 mach->ibc = sclp.ibc;
1480 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481 S390_ARCH_FAC_LIST_SIZE_BYTE);
1482 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483 sizeof(stfle_fac_list));
1484 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1485 kvm->arch.model.ibc,
1486 kvm->arch.model.cpuid);
1487 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1488 mach->fac_mask[0],
1489 mach->fac_mask[1],
1490 mach->fac_mask[2]);
1491 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1492 mach->fac_list[0],
1493 mach->fac_list[1],
1494 mach->fac_list[2]);
1495 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496 ret = -EFAULT;
1497 kfree(mach);
1498 out:
1499 return ret;
1500 }
1501
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503 struct kvm_device_attr *attr)
1504 {
1505 struct kvm_s390_vm_cpu_feat data;
1506
1507 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508 KVM_S390_VM_CPU_FEAT_NR_BITS);
1509 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510 return -EFAULT;
1511 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512 data.feat[0],
1513 data.feat[1],
1514 data.feat[2]);
1515 return 0;
1516 }
1517
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519 struct kvm_device_attr *attr)
1520 {
1521 struct kvm_s390_vm_cpu_feat data;
1522
1523 bitmap_copy((unsigned long *) data.feat,
1524 kvm_s390_available_cpu_feat,
1525 KVM_S390_VM_CPU_FEAT_NR_BITS);
1526 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527 return -EFAULT;
1528 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529 data.feat[0],
1530 data.feat[1],
1531 data.feat[2]);
1532 return 0;
1533 }
1534
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536 struct kvm_device_attr *attr)
1537 {
1538 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540 return -EFAULT;
1541
1542 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599
1600 return 0;
1601 }
1602
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604 struct kvm_device_attr *attr)
1605 {
1606 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608 return -EFAULT;
1609
1610 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667
1668 return 0;
1669 }
1670
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673 int ret = -ENXIO;
1674
1675 switch (attr->attr) {
1676 case KVM_S390_VM_CPU_PROCESSOR:
1677 ret = kvm_s390_get_processor(kvm, attr);
1678 break;
1679 case KVM_S390_VM_CPU_MACHINE:
1680 ret = kvm_s390_get_machine(kvm, attr);
1681 break;
1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683 ret = kvm_s390_get_processor_feat(kvm, attr);
1684 break;
1685 case KVM_S390_VM_CPU_MACHINE_FEAT:
1686 ret = kvm_s390_get_machine_feat(kvm, attr);
1687 break;
1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690 break;
1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693 break;
1694 }
1695 return ret;
1696 }
1697
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700 int ret;
1701
1702 switch (attr->group) {
1703 case KVM_S390_VM_MEM_CTRL:
1704 ret = kvm_s390_set_mem_control(kvm, attr);
1705 break;
1706 case KVM_S390_VM_TOD:
1707 ret = kvm_s390_set_tod(kvm, attr);
1708 break;
1709 case KVM_S390_VM_CPU_MODEL:
1710 ret = kvm_s390_set_cpu_model(kvm, attr);
1711 break;
1712 case KVM_S390_VM_CRYPTO:
1713 ret = kvm_s390_vm_set_crypto(kvm, attr);
1714 break;
1715 case KVM_S390_VM_MIGRATION:
1716 ret = kvm_s390_vm_set_migration(kvm, attr);
1717 break;
1718 default:
1719 ret = -ENXIO;
1720 break;
1721 }
1722
1723 return ret;
1724 }
1725
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728 int ret;
1729
1730 switch (attr->group) {
1731 case KVM_S390_VM_MEM_CTRL:
1732 ret = kvm_s390_get_mem_control(kvm, attr);
1733 break;
1734 case KVM_S390_VM_TOD:
1735 ret = kvm_s390_get_tod(kvm, attr);
1736 break;
1737 case KVM_S390_VM_CPU_MODEL:
1738 ret = kvm_s390_get_cpu_model(kvm, attr);
1739 break;
1740 case KVM_S390_VM_MIGRATION:
1741 ret = kvm_s390_vm_get_migration(kvm, attr);
1742 break;
1743 default:
1744 ret = -ENXIO;
1745 break;
1746 }
1747
1748 return ret;
1749 }
1750
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753 int ret;
1754
1755 switch (attr->group) {
1756 case KVM_S390_VM_MEM_CTRL:
1757 switch (attr->attr) {
1758 case KVM_S390_VM_MEM_ENABLE_CMMA:
1759 case KVM_S390_VM_MEM_CLR_CMMA:
1760 ret = sclp.has_cmma ? 0 : -ENXIO;
1761 break;
1762 case KVM_S390_VM_MEM_LIMIT_SIZE:
1763 ret = 0;
1764 break;
1765 default:
1766 ret = -ENXIO;
1767 break;
1768 }
1769 break;
1770 case KVM_S390_VM_TOD:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_TOD_LOW:
1773 case KVM_S390_VM_TOD_HIGH:
1774 ret = 0;
1775 break;
1776 default:
1777 ret = -ENXIO;
1778 break;
1779 }
1780 break;
1781 case KVM_S390_VM_CPU_MODEL:
1782 switch (attr->attr) {
1783 case KVM_S390_VM_CPU_PROCESSOR:
1784 case KVM_S390_VM_CPU_MACHINE:
1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786 case KVM_S390_VM_CPU_MACHINE_FEAT:
1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789 ret = 0;
1790 break;
1791 default:
1792 ret = -ENXIO;
1793 break;
1794 }
1795 break;
1796 case KVM_S390_VM_CRYPTO:
1797 switch (attr->attr) {
1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802 ret = 0;
1803 break;
1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806 ret = ap_instructions_available() ? 0 : -ENXIO;
1807 break;
1808 default:
1809 ret = -ENXIO;
1810 break;
1811 }
1812 break;
1813 case KVM_S390_VM_MIGRATION:
1814 ret = 0;
1815 break;
1816 default:
1817 ret = -ENXIO;
1818 break;
1819 }
1820
1821 return ret;
1822 }
1823
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826 uint8_t *keys;
1827 uint64_t hva;
1828 int srcu_idx, i, r = 0;
1829
1830 if (args->flags != 0)
1831 return -EINVAL;
1832
1833 /* Is this guest using storage keys? */
1834 if (!mm_uses_skeys(current->mm))
1835 return KVM_S390_GET_SKEYS_NONE;
1836
1837 /* Enforce sane limit on memory allocation */
1838 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839 return -EINVAL;
1840
1841 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842 if (!keys)
1843 return -ENOMEM;
1844
1845 mmap_read_lock(current->mm);
1846 srcu_idx = srcu_read_lock(&kvm->srcu);
1847 for (i = 0; i < args->count; i++) {
1848 hva = gfn_to_hva(kvm, args->start_gfn + i);
1849 if (kvm_is_error_hva(hva)) {
1850 r = -EFAULT;
1851 break;
1852 }
1853
1854 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855 if (r)
1856 break;
1857 }
1858 srcu_read_unlock(&kvm->srcu, srcu_idx);
1859 mmap_read_unlock(current->mm);
1860
1861 if (!r) {
1862 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863 sizeof(uint8_t) * args->count);
1864 if (r)
1865 r = -EFAULT;
1866 }
1867
1868 kvfree(keys);
1869 return r;
1870 }
1871
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874 uint8_t *keys;
1875 uint64_t hva;
1876 int srcu_idx, i, r = 0;
1877 bool unlocked;
1878
1879 if (args->flags != 0)
1880 return -EINVAL;
1881
1882 /* Enforce sane limit on memory allocation */
1883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884 return -EINVAL;
1885
1886 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887 if (!keys)
1888 return -ENOMEM;
1889
1890 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891 sizeof(uint8_t) * args->count);
1892 if (r) {
1893 r = -EFAULT;
1894 goto out;
1895 }
1896
1897 /* Enable storage key handling for the guest */
1898 r = s390_enable_skey();
1899 if (r)
1900 goto out;
1901
1902 i = 0;
1903 mmap_read_lock(current->mm);
1904 srcu_idx = srcu_read_lock(&kvm->srcu);
1905 while (i < args->count) {
1906 unlocked = false;
1907 hva = gfn_to_hva(kvm, args->start_gfn + i);
1908 if (kvm_is_error_hva(hva)) {
1909 r = -EFAULT;
1910 break;
1911 }
1912
1913 /* Lowest order bit is reserved */
1914 if (keys[i] & 0x01) {
1915 r = -EINVAL;
1916 break;
1917 }
1918
1919 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920 if (r) {
1921 r = fixup_user_fault(current->mm, hva,
1922 FAULT_FLAG_WRITE, &unlocked);
1923 if (r)
1924 break;
1925 }
1926 if (!r)
1927 i++;
1928 }
1929 srcu_read_unlock(&kvm->srcu, srcu_idx);
1930 mmap_read_unlock(current->mm);
1931 out:
1932 kvfree(keys);
1933 return r;
1934 }
1935
1936 /*
1937 * Base address and length must be sent at the start of each block, therefore
1938 * it's cheaper to send some clean data, as long as it's less than the size of
1939 * two longs.
1940 */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944
1945 /*
1946 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1947 * address falls in a hole. In that case the index of one of the memslots
1948 * bordering the hole is returned.
1949 */
1950 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1951 {
1952 int start = 0, end = slots->used_slots;
1953 int slot = atomic_read(&slots->last_used_slot);
1954 struct kvm_memory_slot *memslots = slots->memslots;
1955
1956 if (gfn >= memslots[slot].base_gfn &&
1957 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1958 return slot;
1959
1960 while (start < end) {
1961 slot = start + (end - start) / 2;
1962
1963 if (gfn >= memslots[slot].base_gfn)
1964 end = slot;
1965 else
1966 start = slot + 1;
1967 }
1968
1969 if (start >= slots->used_slots)
1970 return slots->used_slots - 1;
1971
1972 if (gfn >= memslots[start].base_gfn &&
1973 gfn < memslots[start].base_gfn + memslots[start].npages) {
1974 atomic_set(&slots->last_used_slot, start);
1975 }
1976
1977 return start;
1978 }
1979
1980 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1981 u8 *res, unsigned long bufsize)
1982 {
1983 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1984
1985 args->count = 0;
1986 while (args->count < bufsize) {
1987 hva = gfn_to_hva(kvm, cur_gfn);
1988 /*
1989 * We return an error if the first value was invalid, but we
1990 * return successfully if at least one value was copied.
1991 */
1992 if (kvm_is_error_hva(hva))
1993 return args->count ? 0 : -EFAULT;
1994 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1995 pgstev = 0;
1996 res[args->count++] = (pgstev >> 24) & 0x43;
1997 cur_gfn++;
1998 }
1999
2000 return 0;
2001 }
2002
2003 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2004 unsigned long cur_gfn)
2005 {
2006 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2007 struct kvm_memory_slot *ms = slots->memslots + slotidx;
2008 unsigned long ofs = cur_gfn - ms->base_gfn;
2009
2010 if (ms->base_gfn + ms->npages <= cur_gfn) {
2011 slotidx--;
2012 /* If we are above the highest slot, wrap around */
2013 if (slotidx < 0)
2014 slotidx = slots->used_slots - 1;
2015
2016 ms = slots->memslots + slotidx;
2017 ofs = 0;
2018 }
2019 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2020 while ((slotidx > 0) && (ofs >= ms->npages)) {
2021 slotidx--;
2022 ms = slots->memslots + slotidx;
2023 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2024 }
2025 return ms->base_gfn + ofs;
2026 }
2027
2028 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2029 u8 *res, unsigned long bufsize)
2030 {
2031 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2032 struct kvm_memslots *slots = kvm_memslots(kvm);
2033 struct kvm_memory_slot *ms;
2034
2035 if (unlikely(!slots->used_slots))
2036 return 0;
2037
2038 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2039 ms = gfn_to_memslot(kvm, cur_gfn);
2040 args->count = 0;
2041 args->start_gfn = cur_gfn;
2042 if (!ms)
2043 return 0;
2044 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2045 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2046
2047 while (args->count < bufsize) {
2048 hva = gfn_to_hva(kvm, cur_gfn);
2049 if (kvm_is_error_hva(hva))
2050 return 0;
2051 /* Decrement only if we actually flipped the bit to 0 */
2052 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2053 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2054 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2055 pgstev = 0;
2056 /* Save the value */
2057 res[args->count++] = (pgstev >> 24) & 0x43;
2058 /* If the next bit is too far away, stop. */
2059 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2060 return 0;
2061 /* If we reached the previous "next", find the next one */
2062 if (cur_gfn == next_gfn)
2063 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2064 /* Reached the end of memory or of the buffer, stop */
2065 if ((next_gfn >= mem_end) ||
2066 (next_gfn - args->start_gfn >= bufsize))
2067 return 0;
2068 cur_gfn++;
2069 /* Reached the end of the current memslot, take the next one. */
2070 if (cur_gfn - ms->base_gfn >= ms->npages) {
2071 ms = gfn_to_memslot(kvm, cur_gfn);
2072 if (!ms)
2073 return 0;
2074 }
2075 }
2076 return 0;
2077 }
2078
2079 /*
2080 * This function searches for the next page with dirty CMMA attributes, and
2081 * saves the attributes in the buffer up to either the end of the buffer or
2082 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2083 * no trailing clean bytes are saved.
2084 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2085 * output buffer will indicate 0 as length.
2086 */
2087 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2088 struct kvm_s390_cmma_log *args)
2089 {
2090 unsigned long bufsize;
2091 int srcu_idx, peek, ret;
2092 u8 *values;
2093
2094 if (!kvm->arch.use_cmma)
2095 return -ENXIO;
2096 /* Invalid/unsupported flags were specified */
2097 if (args->flags & ~KVM_S390_CMMA_PEEK)
2098 return -EINVAL;
2099 /* Migration mode query, and we are not doing a migration */
2100 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2101 if (!peek && !kvm->arch.migration_mode)
2102 return -EINVAL;
2103 /* CMMA is disabled or was not used, or the buffer has length zero */
2104 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2105 if (!bufsize || !kvm->mm->context.uses_cmm) {
2106 memset(args, 0, sizeof(*args));
2107 return 0;
2108 }
2109 /* We are not peeking, and there are no dirty pages */
2110 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2111 memset(args, 0, sizeof(*args));
2112 return 0;
2113 }
2114
2115 values = vmalloc(bufsize);
2116 if (!values)
2117 return -ENOMEM;
2118
2119 mmap_read_lock(kvm->mm);
2120 srcu_idx = srcu_read_lock(&kvm->srcu);
2121 if (peek)
2122 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2123 else
2124 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2125 srcu_read_unlock(&kvm->srcu, srcu_idx);
2126 mmap_read_unlock(kvm->mm);
2127
2128 if (kvm->arch.migration_mode)
2129 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2130 else
2131 args->remaining = 0;
2132
2133 if (copy_to_user((void __user *)args->values, values, args->count))
2134 ret = -EFAULT;
2135
2136 vfree(values);
2137 return ret;
2138 }
2139
2140 /*
2141 * This function sets the CMMA attributes for the given pages. If the input
2142 * buffer has zero length, no action is taken, otherwise the attributes are
2143 * set and the mm->context.uses_cmm flag is set.
2144 */
2145 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2146 const struct kvm_s390_cmma_log *args)
2147 {
2148 unsigned long hva, mask, pgstev, i;
2149 uint8_t *bits;
2150 int srcu_idx, r = 0;
2151
2152 mask = args->mask;
2153
2154 if (!kvm->arch.use_cmma)
2155 return -ENXIO;
2156 /* invalid/unsupported flags */
2157 if (args->flags != 0)
2158 return -EINVAL;
2159 /* Enforce sane limit on memory allocation */
2160 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2161 return -EINVAL;
2162 /* Nothing to do */
2163 if (args->count == 0)
2164 return 0;
2165
2166 bits = vmalloc(array_size(sizeof(*bits), args->count));
2167 if (!bits)
2168 return -ENOMEM;
2169
2170 r = copy_from_user(bits, (void __user *)args->values, args->count);
2171 if (r) {
2172 r = -EFAULT;
2173 goto out;
2174 }
2175
2176 mmap_read_lock(kvm->mm);
2177 srcu_idx = srcu_read_lock(&kvm->srcu);
2178 for (i = 0; i < args->count; i++) {
2179 hva = gfn_to_hva(kvm, args->start_gfn + i);
2180 if (kvm_is_error_hva(hva)) {
2181 r = -EFAULT;
2182 break;
2183 }
2184
2185 pgstev = bits[i];
2186 pgstev = pgstev << 24;
2187 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2188 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2189 }
2190 srcu_read_unlock(&kvm->srcu, srcu_idx);
2191 mmap_read_unlock(kvm->mm);
2192
2193 if (!kvm->mm->context.uses_cmm) {
2194 mmap_write_lock(kvm->mm);
2195 kvm->mm->context.uses_cmm = 1;
2196 mmap_write_unlock(kvm->mm);
2197 }
2198 out:
2199 vfree(bits);
2200 return r;
2201 }
2202
2203 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2204 {
2205 struct kvm_vcpu *vcpu;
2206 u16 rc, rrc;
2207 int ret = 0;
2208 int i;
2209
2210 /*
2211 * We ignore failures and try to destroy as many CPUs as possible.
2212 * At the same time we must not free the assigned resources when
2213 * this fails, as the ultravisor has still access to that memory.
2214 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2215 * behind.
2216 * We want to return the first failure rc and rrc, though.
2217 */
2218 kvm_for_each_vcpu(i, vcpu, kvm) {
2219 mutex_lock(&vcpu->mutex);
2220 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2221 *rcp = rc;
2222 *rrcp = rrc;
2223 ret = -EIO;
2224 }
2225 mutex_unlock(&vcpu->mutex);
2226 }
2227 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2228 if (use_gisa)
2229 kvm_s390_gisa_enable(kvm);
2230 return ret;
2231 }
2232
2233 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234 {
2235 int i, r = 0;
2236 u16 dummy;
2237
2238 struct kvm_vcpu *vcpu;
2239
2240 /* Disable the GISA if the ultravisor does not support AIV. */
2241 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2242 kvm_s390_gisa_disable(kvm);
2243
2244 kvm_for_each_vcpu(i, vcpu, kvm) {
2245 mutex_lock(&vcpu->mutex);
2246 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2247 mutex_unlock(&vcpu->mutex);
2248 if (r)
2249 break;
2250 }
2251 if (r)
2252 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2253 return r;
2254 }
2255
2256 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2257 {
2258 int r = 0;
2259 u16 dummy;
2260 void __user *argp = (void __user *)cmd->data;
2261
2262 switch (cmd->cmd) {
2263 case KVM_PV_ENABLE: {
2264 r = -EINVAL;
2265 if (kvm_s390_pv_is_protected(kvm))
2266 break;
2267
2268 /*
2269 * FMT 4 SIE needs esca. As we never switch back to bsca from
2270 * esca, we need no cleanup in the error cases below
2271 */
2272 r = sca_switch_to_extended(kvm);
2273 if (r)
2274 break;
2275
2276 mmap_write_lock(current->mm);
2277 r = gmap_mark_unmergeable();
2278 mmap_write_unlock(current->mm);
2279 if (r)
2280 break;
2281
2282 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2283 if (r)
2284 break;
2285
2286 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2287 if (r)
2288 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2289
2290 /* we need to block service interrupts from now on */
2291 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2292 break;
2293 }
2294 case KVM_PV_DISABLE: {
2295 r = -EINVAL;
2296 if (!kvm_s390_pv_is_protected(kvm))
2297 break;
2298
2299 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2300 /*
2301 * If a CPU could not be destroyed, destroy VM will also fail.
2302 * There is no point in trying to destroy it. Instead return
2303 * the rc and rrc from the first CPU that failed destroying.
2304 */
2305 if (r)
2306 break;
2307 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2308
2309 /* no need to block service interrupts any more */
2310 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2311 break;
2312 }
2313 case KVM_PV_SET_SEC_PARMS: {
2314 struct kvm_s390_pv_sec_parm parms = {};
2315 void *hdr;
2316
2317 r = -EINVAL;
2318 if (!kvm_s390_pv_is_protected(kvm))
2319 break;
2320
2321 r = -EFAULT;
2322 if (copy_from_user(&parms, argp, sizeof(parms)))
2323 break;
2324
2325 /* Currently restricted to 8KB */
2326 r = -EINVAL;
2327 if (parms.length > PAGE_SIZE * 2)
2328 break;
2329
2330 r = -ENOMEM;
2331 hdr = vmalloc(parms.length);
2332 if (!hdr)
2333 break;
2334
2335 r = -EFAULT;
2336 if (!copy_from_user(hdr, (void __user *)parms.origin,
2337 parms.length))
2338 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2339 &cmd->rc, &cmd->rrc);
2340
2341 vfree(hdr);
2342 break;
2343 }
2344 case KVM_PV_UNPACK: {
2345 struct kvm_s390_pv_unp unp = {};
2346
2347 r = -EINVAL;
2348 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2349 break;
2350
2351 r = -EFAULT;
2352 if (copy_from_user(&unp, argp, sizeof(unp)))
2353 break;
2354
2355 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2356 &cmd->rc, &cmd->rrc);
2357 break;
2358 }
2359 case KVM_PV_VERIFY: {
2360 r = -EINVAL;
2361 if (!kvm_s390_pv_is_protected(kvm))
2362 break;
2363
2364 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2365 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2366 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2367 cmd->rrc);
2368 break;
2369 }
2370 case KVM_PV_PREP_RESET: {
2371 r = -EINVAL;
2372 if (!kvm_s390_pv_is_protected(kvm))
2373 break;
2374
2375 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2376 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2377 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2378 cmd->rc, cmd->rrc);
2379 break;
2380 }
2381 case KVM_PV_UNSHARE_ALL: {
2382 r = -EINVAL;
2383 if (!kvm_s390_pv_is_protected(kvm))
2384 break;
2385
2386 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2387 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2388 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2389 cmd->rc, cmd->rrc);
2390 break;
2391 }
2392 default:
2393 r = -ENOTTY;
2394 }
2395 return r;
2396 }
2397
2398 static bool access_key_invalid(u8 access_key)
2399 {
2400 return access_key > 0xf;
2401 }
2402
2403 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2404 {
2405 void __user *uaddr = (void __user *)mop->buf;
2406 u64 supported_flags;
2407 void *tmpbuf = NULL;
2408 int r, srcu_idx;
2409
2410 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2411 | KVM_S390_MEMOP_F_CHECK_ONLY;
2412 if (mop->flags & ~supported_flags || !mop->size)
2413 return -EINVAL;
2414 if (mop->size > MEM_OP_MAX_SIZE)
2415 return -E2BIG;
2416 if (kvm_s390_pv_is_protected(kvm))
2417 return -EINVAL;
2418 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2419 if (access_key_invalid(mop->key))
2420 return -EINVAL;
2421 } else {
2422 mop->key = 0;
2423 }
2424 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2425 tmpbuf = vmalloc(mop->size);
2426 if (!tmpbuf)
2427 return -ENOMEM;
2428 }
2429
2430 srcu_idx = srcu_read_lock(&kvm->srcu);
2431
2432 if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2433 r = PGM_ADDRESSING;
2434 goto out_unlock;
2435 }
2436
2437 switch (mop->op) {
2438 case KVM_S390_MEMOP_ABSOLUTE_READ: {
2439 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2440 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2441 } else {
2442 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2443 mop->size, GACC_FETCH, mop->key);
2444 if (r == 0) {
2445 if (copy_to_user(uaddr, tmpbuf, mop->size))
2446 r = -EFAULT;
2447 }
2448 }
2449 break;
2450 }
2451 case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2452 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2453 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2454 } else {
2455 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2456 r = -EFAULT;
2457 break;
2458 }
2459 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2460 mop->size, GACC_STORE, mop->key);
2461 }
2462 break;
2463 }
2464 default:
2465 r = -EINVAL;
2466 }
2467
2468 out_unlock:
2469 srcu_read_unlock(&kvm->srcu, srcu_idx);
2470
2471 vfree(tmpbuf);
2472 return r;
2473 }
2474
2475 long kvm_arch_vm_ioctl(struct file *filp,
2476 unsigned int ioctl, unsigned long arg)
2477 {
2478 struct kvm *kvm = filp->private_data;
2479 void __user *argp = (void __user *)arg;
2480 struct kvm_device_attr attr;
2481 int r;
2482
2483 switch (ioctl) {
2484 case KVM_S390_INTERRUPT: {
2485 struct kvm_s390_interrupt s390int;
2486
2487 r = -EFAULT;
2488 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2489 break;
2490 r = kvm_s390_inject_vm(kvm, &s390int);
2491 break;
2492 }
2493 case KVM_CREATE_IRQCHIP: {
2494 struct kvm_irq_routing_entry routing;
2495
2496 r = -EINVAL;
2497 if (kvm->arch.use_irqchip) {
2498 /* Set up dummy routing. */
2499 memset(&routing, 0, sizeof(routing));
2500 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2501 }
2502 break;
2503 }
2504 case KVM_SET_DEVICE_ATTR: {
2505 r = -EFAULT;
2506 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2507 break;
2508 r = kvm_s390_vm_set_attr(kvm, &attr);
2509 break;
2510 }
2511 case KVM_GET_DEVICE_ATTR: {
2512 r = -EFAULT;
2513 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2514 break;
2515 r = kvm_s390_vm_get_attr(kvm, &attr);
2516 break;
2517 }
2518 case KVM_HAS_DEVICE_ATTR: {
2519 r = -EFAULT;
2520 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2521 break;
2522 r = kvm_s390_vm_has_attr(kvm, &attr);
2523 break;
2524 }
2525 case KVM_S390_GET_SKEYS: {
2526 struct kvm_s390_skeys args;
2527
2528 r = -EFAULT;
2529 if (copy_from_user(&args, argp,
2530 sizeof(struct kvm_s390_skeys)))
2531 break;
2532 r = kvm_s390_get_skeys(kvm, &args);
2533 break;
2534 }
2535 case KVM_S390_SET_SKEYS: {
2536 struct kvm_s390_skeys args;
2537
2538 r = -EFAULT;
2539 if (copy_from_user(&args, argp,
2540 sizeof(struct kvm_s390_skeys)))
2541 break;
2542 r = kvm_s390_set_skeys(kvm, &args);
2543 break;
2544 }
2545 case KVM_S390_GET_CMMA_BITS: {
2546 struct kvm_s390_cmma_log args;
2547
2548 r = -EFAULT;
2549 if (copy_from_user(&args, argp, sizeof(args)))
2550 break;
2551 mutex_lock(&kvm->slots_lock);
2552 r = kvm_s390_get_cmma_bits(kvm, &args);
2553 mutex_unlock(&kvm->slots_lock);
2554 if (!r) {
2555 r = copy_to_user(argp, &args, sizeof(args));
2556 if (r)
2557 r = -EFAULT;
2558 }
2559 break;
2560 }
2561 case KVM_S390_SET_CMMA_BITS: {
2562 struct kvm_s390_cmma_log args;
2563
2564 r = -EFAULT;
2565 if (copy_from_user(&args, argp, sizeof(args)))
2566 break;
2567 mutex_lock(&kvm->slots_lock);
2568 r = kvm_s390_set_cmma_bits(kvm, &args);
2569 mutex_unlock(&kvm->slots_lock);
2570 break;
2571 }
2572 case KVM_S390_PV_COMMAND: {
2573 struct kvm_pv_cmd args;
2574
2575 /* protvirt means user cpu state */
2576 kvm_s390_set_user_cpu_state_ctrl(kvm);
2577 r = 0;
2578 if (!is_prot_virt_host()) {
2579 r = -EINVAL;
2580 break;
2581 }
2582 if (copy_from_user(&args, argp, sizeof(args))) {
2583 r = -EFAULT;
2584 break;
2585 }
2586 if (args.flags) {
2587 r = -EINVAL;
2588 break;
2589 }
2590 mutex_lock(&kvm->lock);
2591 r = kvm_s390_handle_pv(kvm, &args);
2592 mutex_unlock(&kvm->lock);
2593 if (copy_to_user(argp, &args, sizeof(args))) {
2594 r = -EFAULT;
2595 break;
2596 }
2597 break;
2598 }
2599 case KVM_S390_MEM_OP: {
2600 struct kvm_s390_mem_op mem_op;
2601
2602 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2603 r = kvm_s390_vm_mem_op(kvm, &mem_op);
2604 else
2605 r = -EFAULT;
2606 break;
2607 }
2608 default:
2609 r = -ENOTTY;
2610 }
2611
2612 return r;
2613 }
2614
2615 static int kvm_s390_apxa_installed(void)
2616 {
2617 struct ap_config_info info;
2618
2619 if (ap_instructions_available()) {
2620 if (ap_qci(&info) == 0)
2621 return info.apxa;
2622 }
2623
2624 return 0;
2625 }
2626
2627 /*
2628 * The format of the crypto control block (CRYCB) is specified in the 3 low
2629 * order bits of the CRYCB designation (CRYCBD) field as follows:
2630 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2631 * AP extended addressing (APXA) facility are installed.
2632 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2633 * Format 2: Both the APXA and MSAX3 facilities are installed
2634 */
2635 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2636 {
2637 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2638
2639 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2640 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2641
2642 /* Check whether MSAX3 is installed */
2643 if (!test_kvm_facility(kvm, 76))
2644 return;
2645
2646 if (kvm_s390_apxa_installed())
2647 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2648 else
2649 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2650 }
2651
2652 /*
2653 * kvm_arch_crypto_set_masks
2654 *
2655 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2656 * to be set.
2657 * @apm: the mask identifying the accessible AP adapters
2658 * @aqm: the mask identifying the accessible AP domains
2659 * @adm: the mask identifying the accessible AP control domains
2660 *
2661 * Set the masks that identify the adapters, domains and control domains to
2662 * which the KVM guest is granted access.
2663 *
2664 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2665 * function.
2666 */
2667 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2668 unsigned long *aqm, unsigned long *adm)
2669 {
2670 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2671
2672 kvm_s390_vcpu_block_all(kvm);
2673
2674 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2675 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2676 memcpy(crycb->apcb1.apm, apm, 32);
2677 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2678 apm[0], apm[1], apm[2], apm[3]);
2679 memcpy(crycb->apcb1.aqm, aqm, 32);
2680 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2681 aqm[0], aqm[1], aqm[2], aqm[3]);
2682 memcpy(crycb->apcb1.adm, adm, 32);
2683 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2684 adm[0], adm[1], adm[2], adm[3]);
2685 break;
2686 case CRYCB_FORMAT1:
2687 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2688 memcpy(crycb->apcb0.apm, apm, 8);
2689 memcpy(crycb->apcb0.aqm, aqm, 2);
2690 memcpy(crycb->apcb0.adm, adm, 2);
2691 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2692 apm[0], *((unsigned short *)aqm),
2693 *((unsigned short *)adm));
2694 break;
2695 default: /* Can not happen */
2696 break;
2697 }
2698
2699 /* recreate the shadow crycb for each vcpu */
2700 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2701 kvm_s390_vcpu_unblock_all(kvm);
2702 }
2703 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2704
2705 /*
2706 * kvm_arch_crypto_clear_masks
2707 *
2708 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2709 * to be cleared.
2710 *
2711 * Clear the masks that identify the adapters, domains and control domains to
2712 * which the KVM guest is granted access.
2713 *
2714 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2715 * function.
2716 */
2717 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2718 {
2719 kvm_s390_vcpu_block_all(kvm);
2720
2721 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2722 sizeof(kvm->arch.crypto.crycb->apcb0));
2723 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2724 sizeof(kvm->arch.crypto.crycb->apcb1));
2725
2726 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2727 /* recreate the shadow crycb for each vcpu */
2728 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2729 kvm_s390_vcpu_unblock_all(kvm);
2730 }
2731 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2732
2733 static u64 kvm_s390_get_initial_cpuid(void)
2734 {
2735 struct cpuid cpuid;
2736
2737 get_cpu_id(&cpuid);
2738 cpuid.version = 0xff;
2739 return *((u64 *) &cpuid);
2740 }
2741
2742 static void kvm_s390_crypto_init(struct kvm *kvm)
2743 {
2744 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2745 kvm_s390_set_crycb_format(kvm);
2746 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2747
2748 if (!test_kvm_facility(kvm, 76))
2749 return;
2750
2751 /* Enable AES/DEA protected key functions by default */
2752 kvm->arch.crypto.aes_kw = 1;
2753 kvm->arch.crypto.dea_kw = 1;
2754 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2755 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2756 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2757 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2758 }
2759
2760 static void sca_dispose(struct kvm *kvm)
2761 {
2762 if (kvm->arch.use_esca)
2763 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2764 else
2765 free_page((unsigned long)(kvm->arch.sca));
2766 kvm->arch.sca = NULL;
2767 }
2768
2769 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2770 {
2771 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2772 int i, rc;
2773 char debug_name[16];
2774 static unsigned long sca_offset;
2775
2776 rc = -EINVAL;
2777 #ifdef CONFIG_KVM_S390_UCONTROL
2778 if (type & ~KVM_VM_S390_UCONTROL)
2779 goto out_err;
2780 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2781 goto out_err;
2782 #else
2783 if (type)
2784 goto out_err;
2785 #endif
2786
2787 rc = s390_enable_sie();
2788 if (rc)
2789 goto out_err;
2790
2791 rc = -ENOMEM;
2792
2793 if (!sclp.has_64bscao)
2794 alloc_flags |= GFP_DMA;
2795 rwlock_init(&kvm->arch.sca_lock);
2796 /* start with basic SCA */
2797 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2798 if (!kvm->arch.sca)
2799 goto out_err;
2800 mutex_lock(&kvm_lock);
2801 sca_offset += 16;
2802 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2803 sca_offset = 0;
2804 kvm->arch.sca = (struct bsca_block *)
2805 ((char *) kvm->arch.sca + sca_offset);
2806 mutex_unlock(&kvm_lock);
2807
2808 sprintf(debug_name, "kvm-%u", current->pid);
2809
2810 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2811 if (!kvm->arch.dbf)
2812 goto out_err;
2813
2814 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2815 kvm->arch.sie_page2 =
2816 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2817 if (!kvm->arch.sie_page2)
2818 goto out_err;
2819
2820 kvm->arch.sie_page2->kvm = kvm;
2821 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2822
2823 for (i = 0; i < kvm_s390_fac_size(); i++) {
2824 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2825 (kvm_s390_fac_base[i] |
2826 kvm_s390_fac_ext[i]);
2827 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2828 kvm_s390_fac_base[i];
2829 }
2830 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2831
2832 /* we are always in czam mode - even on pre z14 machines */
2833 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2834 set_kvm_facility(kvm->arch.model.fac_list, 138);
2835 /* we emulate STHYI in kvm */
2836 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2837 set_kvm_facility(kvm->arch.model.fac_list, 74);
2838 if (MACHINE_HAS_TLB_GUEST) {
2839 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2840 set_kvm_facility(kvm->arch.model.fac_list, 147);
2841 }
2842
2843 if (css_general_characteristics.aiv && test_facility(65))
2844 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2845
2846 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2847 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2848
2849 kvm_s390_crypto_init(kvm);
2850
2851 mutex_init(&kvm->arch.float_int.ais_lock);
2852 spin_lock_init(&kvm->arch.float_int.lock);
2853 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2854 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2855 init_waitqueue_head(&kvm->arch.ipte_wq);
2856 mutex_init(&kvm->arch.ipte_mutex);
2857
2858 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2859 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2860
2861 if (type & KVM_VM_S390_UCONTROL) {
2862 kvm->arch.gmap = NULL;
2863 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2864 } else {
2865 if (sclp.hamax == U64_MAX)
2866 kvm->arch.mem_limit = TASK_SIZE_MAX;
2867 else
2868 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2869 sclp.hamax + 1);
2870 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2871 if (!kvm->arch.gmap)
2872 goto out_err;
2873 kvm->arch.gmap->private = kvm;
2874 kvm->arch.gmap->pfault_enabled = 0;
2875 }
2876
2877 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2878 kvm->arch.use_skf = sclp.has_skey;
2879 spin_lock_init(&kvm->arch.start_stop_lock);
2880 kvm_s390_vsie_init(kvm);
2881 if (use_gisa)
2882 kvm_s390_gisa_init(kvm);
2883 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2884
2885 return 0;
2886 out_err:
2887 free_page((unsigned long)kvm->arch.sie_page2);
2888 debug_unregister(kvm->arch.dbf);
2889 sca_dispose(kvm);
2890 KVM_EVENT(3, "creation of vm failed: %d", rc);
2891 return rc;
2892 }
2893
2894 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2895 {
2896 u16 rc, rrc;
2897
2898 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2899 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2900 kvm_s390_clear_local_irqs(vcpu);
2901 kvm_clear_async_pf_completion_queue(vcpu);
2902 if (!kvm_is_ucontrol(vcpu->kvm))
2903 sca_del_vcpu(vcpu);
2904
2905 if (kvm_is_ucontrol(vcpu->kvm))
2906 gmap_remove(vcpu->arch.gmap);
2907
2908 if (vcpu->kvm->arch.use_cmma)
2909 kvm_s390_vcpu_unsetup_cmma(vcpu);
2910 /* We can not hold the vcpu mutex here, we are already dying */
2911 if (kvm_s390_pv_cpu_get_handle(vcpu))
2912 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2913 free_page((unsigned long)(vcpu->arch.sie_block));
2914 }
2915
2916 static void kvm_free_vcpus(struct kvm *kvm)
2917 {
2918 unsigned int i;
2919 struct kvm_vcpu *vcpu;
2920
2921 kvm_for_each_vcpu(i, vcpu, kvm)
2922 kvm_vcpu_destroy(vcpu);
2923
2924 mutex_lock(&kvm->lock);
2925 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2926 kvm->vcpus[i] = NULL;
2927
2928 atomic_set(&kvm->online_vcpus, 0);
2929 mutex_unlock(&kvm->lock);
2930 }
2931
2932 void kvm_arch_destroy_vm(struct kvm *kvm)
2933 {
2934 u16 rc, rrc;
2935
2936 kvm_free_vcpus(kvm);
2937 sca_dispose(kvm);
2938 kvm_s390_gisa_destroy(kvm);
2939 /*
2940 * We are already at the end of life and kvm->lock is not taken.
2941 * This is ok as the file descriptor is closed by now and nobody
2942 * can mess with the pv state. To avoid lockdep_assert_held from
2943 * complaining we do not use kvm_s390_pv_is_protected.
2944 */
2945 if (kvm_s390_pv_get_handle(kvm))
2946 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2947 debug_unregister(kvm->arch.dbf);
2948 free_page((unsigned long)kvm->arch.sie_page2);
2949 if (!kvm_is_ucontrol(kvm))
2950 gmap_remove(kvm->arch.gmap);
2951 kvm_s390_destroy_adapters(kvm);
2952 kvm_s390_clear_float_irqs(kvm);
2953 kvm_s390_vsie_destroy(kvm);
2954 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2955 }
2956
2957 /* Section: vcpu related */
2958 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2959 {
2960 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2961 if (!vcpu->arch.gmap)
2962 return -ENOMEM;
2963 vcpu->arch.gmap->private = vcpu->kvm;
2964
2965 return 0;
2966 }
2967
2968 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2969 {
2970 if (!kvm_s390_use_sca_entries())
2971 return;
2972 read_lock(&vcpu->kvm->arch.sca_lock);
2973 if (vcpu->kvm->arch.use_esca) {
2974 struct esca_block *sca = vcpu->kvm->arch.sca;
2975
2976 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2977 sca->cpu[vcpu->vcpu_id].sda = 0;
2978 } else {
2979 struct bsca_block *sca = vcpu->kvm->arch.sca;
2980
2981 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2982 sca->cpu[vcpu->vcpu_id].sda = 0;
2983 }
2984 read_unlock(&vcpu->kvm->arch.sca_lock);
2985 }
2986
2987 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2988 {
2989 if (!kvm_s390_use_sca_entries()) {
2990 struct bsca_block *sca = vcpu->kvm->arch.sca;
2991
2992 /* we still need the basic sca for the ipte control */
2993 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2994 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2995 return;
2996 }
2997 read_lock(&vcpu->kvm->arch.sca_lock);
2998 if (vcpu->kvm->arch.use_esca) {
2999 struct esca_block *sca = vcpu->kvm->arch.sca;
3000
3001 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3002 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3003 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
3004 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3005 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3006 } else {
3007 struct bsca_block *sca = vcpu->kvm->arch.sca;
3008
3009 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3010 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3011 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3012 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3013 }
3014 read_unlock(&vcpu->kvm->arch.sca_lock);
3015 }
3016
3017 /* Basic SCA to Extended SCA data copy routines */
3018 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3019 {
3020 d->sda = s->sda;
3021 d->sigp_ctrl.c = s->sigp_ctrl.c;
3022 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3023 }
3024
3025 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3026 {
3027 int i;
3028
3029 d->ipte_control = s->ipte_control;
3030 d->mcn[0] = s->mcn;
3031 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3032 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3033 }
3034
3035 static int sca_switch_to_extended(struct kvm *kvm)
3036 {
3037 struct bsca_block *old_sca = kvm->arch.sca;
3038 struct esca_block *new_sca;
3039 struct kvm_vcpu *vcpu;
3040 unsigned int vcpu_idx;
3041 u32 scaol, scaoh;
3042
3043 if (kvm->arch.use_esca)
3044 return 0;
3045
3046 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3047 if (!new_sca)
3048 return -ENOMEM;
3049
3050 scaoh = (u32)((u64)(new_sca) >> 32);
3051 scaol = (u32)(u64)(new_sca) & ~0x3fU;
3052
3053 kvm_s390_vcpu_block_all(kvm);
3054 write_lock(&kvm->arch.sca_lock);
3055
3056 sca_copy_b_to_e(new_sca, old_sca);
3057
3058 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3059 vcpu->arch.sie_block->scaoh = scaoh;
3060 vcpu->arch.sie_block->scaol = scaol;
3061 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3062 }
3063 kvm->arch.sca = new_sca;
3064 kvm->arch.use_esca = 1;
3065
3066 write_unlock(&kvm->arch.sca_lock);
3067 kvm_s390_vcpu_unblock_all(kvm);
3068
3069 free_page((unsigned long)old_sca);
3070
3071 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3072 old_sca, kvm->arch.sca);
3073 return 0;
3074 }
3075
3076 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3077 {
3078 int rc;
3079
3080 if (!kvm_s390_use_sca_entries()) {
3081 if (id < KVM_MAX_VCPUS)
3082 return true;
3083 return false;
3084 }
3085 if (id < KVM_S390_BSCA_CPU_SLOTS)
3086 return true;
3087 if (!sclp.has_esca || !sclp.has_64bscao)
3088 return false;
3089
3090 mutex_lock(&kvm->lock);
3091 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3092 mutex_unlock(&kvm->lock);
3093
3094 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3095 }
3096
3097 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3098 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3099 {
3100 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3101 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3102 vcpu->arch.cputm_start = get_tod_clock_fast();
3103 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3104 }
3105
3106 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3107 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3108 {
3109 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3110 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3111 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3112 vcpu->arch.cputm_start = 0;
3113 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3114 }
3115
3116 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3117 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3118 {
3119 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3120 vcpu->arch.cputm_enabled = true;
3121 __start_cpu_timer_accounting(vcpu);
3122 }
3123
3124 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3125 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3126 {
3127 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3128 __stop_cpu_timer_accounting(vcpu);
3129 vcpu->arch.cputm_enabled = false;
3130 }
3131
3132 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3133 {
3134 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3135 __enable_cpu_timer_accounting(vcpu);
3136 preempt_enable();
3137 }
3138
3139 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3140 {
3141 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3142 __disable_cpu_timer_accounting(vcpu);
3143 preempt_enable();
3144 }
3145
3146 /* set the cpu timer - may only be called from the VCPU thread itself */
3147 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3148 {
3149 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3150 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3151 if (vcpu->arch.cputm_enabled)
3152 vcpu->arch.cputm_start = get_tod_clock_fast();
3153 vcpu->arch.sie_block->cputm = cputm;
3154 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3155 preempt_enable();
3156 }
3157
3158 /* update and get the cpu timer - can also be called from other VCPU threads */
3159 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3160 {
3161 unsigned int seq;
3162 __u64 value;
3163
3164 if (unlikely(!vcpu->arch.cputm_enabled))
3165 return vcpu->arch.sie_block->cputm;
3166
3167 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3168 do {
3169 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3170 /*
3171 * If the writer would ever execute a read in the critical
3172 * section, e.g. in irq context, we have a deadlock.
3173 */
3174 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3175 value = vcpu->arch.sie_block->cputm;
3176 /* if cputm_start is 0, accounting is being started/stopped */
3177 if (likely(vcpu->arch.cputm_start))
3178 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3179 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3180 preempt_enable();
3181 return value;
3182 }
3183
3184 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3185 {
3186
3187 gmap_enable(vcpu->arch.enabled_gmap);
3188 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3189 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3190 __start_cpu_timer_accounting(vcpu);
3191 vcpu->cpu = cpu;
3192 }
3193
3194 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3195 {
3196 vcpu->cpu = -1;
3197 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3198 __stop_cpu_timer_accounting(vcpu);
3199 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3200 vcpu->arch.enabled_gmap = gmap_get_enabled();
3201 gmap_disable(vcpu->arch.enabled_gmap);
3202
3203 }
3204
3205 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3206 {
3207 mutex_lock(&vcpu->kvm->lock);
3208 preempt_disable();
3209 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3210 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3211 preempt_enable();
3212 mutex_unlock(&vcpu->kvm->lock);
3213 if (!kvm_is_ucontrol(vcpu->kvm)) {
3214 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3215 sca_add_vcpu(vcpu);
3216 }
3217 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3218 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3219 /* make vcpu_load load the right gmap on the first trigger */
3220 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3221 }
3222
3223 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3224 {
3225 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3226 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3227 return true;
3228 return false;
3229 }
3230
3231 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3232 {
3233 /* At least one ECC subfunction must be present */
3234 return kvm_has_pckmo_subfunc(kvm, 32) ||
3235 kvm_has_pckmo_subfunc(kvm, 33) ||
3236 kvm_has_pckmo_subfunc(kvm, 34) ||
3237 kvm_has_pckmo_subfunc(kvm, 40) ||
3238 kvm_has_pckmo_subfunc(kvm, 41);
3239
3240 }
3241
3242 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3243 {
3244 /*
3245 * If the AP instructions are not being interpreted and the MSAX3
3246 * facility is not configured for the guest, there is nothing to set up.
3247 */
3248 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3249 return;
3250
3251 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3252 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3253 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3254 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3255
3256 if (vcpu->kvm->arch.crypto.apie)
3257 vcpu->arch.sie_block->eca |= ECA_APIE;
3258
3259 /* Set up protected key support */
3260 if (vcpu->kvm->arch.crypto.aes_kw) {
3261 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3262 /* ecc is also wrapped with AES key */
3263 if (kvm_has_pckmo_ecc(vcpu->kvm))
3264 vcpu->arch.sie_block->ecd |= ECD_ECC;
3265 }
3266
3267 if (vcpu->kvm->arch.crypto.dea_kw)
3268 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3269 }
3270
3271 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3272 {
3273 free_page(vcpu->arch.sie_block->cbrlo);
3274 vcpu->arch.sie_block->cbrlo = 0;
3275 }
3276
3277 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3278 {
3279 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3280 if (!vcpu->arch.sie_block->cbrlo)
3281 return -ENOMEM;
3282 return 0;
3283 }
3284
3285 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3286 {
3287 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3288
3289 vcpu->arch.sie_block->ibc = model->ibc;
3290 if (test_kvm_facility(vcpu->kvm, 7))
3291 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3292 }
3293
3294 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3295 {
3296 int rc = 0;
3297 u16 uvrc, uvrrc;
3298
3299 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3300 CPUSTAT_SM |
3301 CPUSTAT_STOPPED);
3302
3303 if (test_kvm_facility(vcpu->kvm, 78))
3304 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3305 else if (test_kvm_facility(vcpu->kvm, 8))
3306 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3307
3308 kvm_s390_vcpu_setup_model(vcpu);
3309
3310 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3311 if (MACHINE_HAS_ESOP)
3312 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3313 if (test_kvm_facility(vcpu->kvm, 9))
3314 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3315 if (test_kvm_facility(vcpu->kvm, 73))
3316 vcpu->arch.sie_block->ecb |= ECB_TE;
3317 if (!kvm_is_ucontrol(vcpu->kvm))
3318 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3319
3320 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3321 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3322 if (test_kvm_facility(vcpu->kvm, 130))
3323 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3324 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3325 if (sclp.has_cei)
3326 vcpu->arch.sie_block->eca |= ECA_CEI;
3327 if (sclp.has_ib)
3328 vcpu->arch.sie_block->eca |= ECA_IB;
3329 if (sclp.has_siif)
3330 vcpu->arch.sie_block->eca |= ECA_SII;
3331 if (sclp.has_sigpif)
3332 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3333 if (test_kvm_facility(vcpu->kvm, 129)) {
3334 vcpu->arch.sie_block->eca |= ECA_VX;
3335 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3336 }
3337 if (test_kvm_facility(vcpu->kvm, 139))
3338 vcpu->arch.sie_block->ecd |= ECD_MEF;
3339 if (test_kvm_facility(vcpu->kvm, 156))
3340 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3341 if (vcpu->arch.sie_block->gd) {
3342 vcpu->arch.sie_block->eca |= ECA_AIV;
3343 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3344 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3345 }
3346 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3347 | SDNXC;
3348 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3349
3350 if (sclp.has_kss)
3351 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3352 else
3353 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3354
3355 if (vcpu->kvm->arch.use_cmma) {
3356 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3357 if (rc)
3358 return rc;
3359 }
3360 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3361 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3362
3363 vcpu->arch.sie_block->hpid = HPID_KVM;
3364
3365 kvm_s390_vcpu_crypto_setup(vcpu);
3366
3367 mutex_lock(&vcpu->kvm->lock);
3368 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3369 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3370 if (rc)
3371 kvm_s390_vcpu_unsetup_cmma(vcpu);
3372 }
3373 mutex_unlock(&vcpu->kvm->lock);
3374
3375 return rc;
3376 }
3377
3378 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3379 {
3380 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3381 return -EINVAL;
3382 return 0;
3383 }
3384
3385 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3386 {
3387 struct sie_page *sie_page;
3388 int rc;
3389
3390 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3391 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3392 if (!sie_page)
3393 return -ENOMEM;
3394
3395 vcpu->arch.sie_block = &sie_page->sie_block;
3396 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3397
3398 /* the real guest size will always be smaller than msl */
3399 vcpu->arch.sie_block->mso = 0;
3400 vcpu->arch.sie_block->msl = sclp.hamax;
3401
3402 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3403 spin_lock_init(&vcpu->arch.local_int.lock);
3404 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3405 seqcount_init(&vcpu->arch.cputm_seqcount);
3406
3407 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3408 kvm_clear_async_pf_completion_queue(vcpu);
3409 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3410 KVM_SYNC_GPRS |
3411 KVM_SYNC_ACRS |
3412 KVM_SYNC_CRS |
3413 KVM_SYNC_ARCH0 |
3414 KVM_SYNC_PFAULT |
3415 KVM_SYNC_DIAG318;
3416 kvm_s390_set_prefix(vcpu, 0);
3417 if (test_kvm_facility(vcpu->kvm, 64))
3418 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3419 if (test_kvm_facility(vcpu->kvm, 82))
3420 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3421 if (test_kvm_facility(vcpu->kvm, 133))
3422 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3423 if (test_kvm_facility(vcpu->kvm, 156))
3424 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3425 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3426 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3427 */
3428 if (MACHINE_HAS_VX)
3429 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3430 else
3431 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3432
3433 if (kvm_is_ucontrol(vcpu->kvm)) {
3434 rc = __kvm_ucontrol_vcpu_init(vcpu);
3435 if (rc)
3436 goto out_free_sie_block;
3437 }
3438
3439 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3440 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3441 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3442
3443 rc = kvm_s390_vcpu_setup(vcpu);
3444 if (rc)
3445 goto out_ucontrol_uninit;
3446 return 0;
3447
3448 out_ucontrol_uninit:
3449 if (kvm_is_ucontrol(vcpu->kvm))
3450 gmap_remove(vcpu->arch.gmap);
3451 out_free_sie_block:
3452 free_page((unsigned long)(vcpu->arch.sie_block));
3453 return rc;
3454 }
3455
3456 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3457 {
3458 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3459 return kvm_s390_vcpu_has_irq(vcpu, 0);
3460 }
3461
3462 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3463 {
3464 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3465 }
3466
3467 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3468 {
3469 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3470 exit_sie(vcpu);
3471 }
3472
3473 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3474 {
3475 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3476 }
3477
3478 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3479 {
3480 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3481 exit_sie(vcpu);
3482 }
3483
3484 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3485 {
3486 return atomic_read(&vcpu->arch.sie_block->prog20) &
3487 (PROG_BLOCK_SIE | PROG_REQUEST);
3488 }
3489
3490 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3491 {
3492 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3493 }
3494
3495 /*
3496 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3497 * If the CPU is not running (e.g. waiting as idle) the function will
3498 * return immediately. */
3499 void exit_sie(struct kvm_vcpu *vcpu)
3500 {
3501 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3502 kvm_s390_vsie_kick(vcpu);
3503 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3504 cpu_relax();
3505 }
3506
3507 /* Kick a guest cpu out of SIE to process a request synchronously */
3508 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3509 {
3510 kvm_make_request(req, vcpu);
3511 kvm_s390_vcpu_request(vcpu);
3512 }
3513
3514 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3515 unsigned long end)
3516 {
3517 struct kvm *kvm = gmap->private;
3518 struct kvm_vcpu *vcpu;
3519 unsigned long prefix;
3520 int i;
3521
3522 if (gmap_is_shadow(gmap))
3523 return;
3524 if (start >= 1UL << 31)
3525 /* We are only interested in prefix pages */
3526 return;
3527 kvm_for_each_vcpu(i, vcpu, kvm) {
3528 /* match against both prefix pages */
3529 prefix = kvm_s390_get_prefix(vcpu);
3530 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3531 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3532 start, end);
3533 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3534 }
3535 }
3536 }
3537
3538 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3539 {
3540 /* do not poll with more than halt_poll_max_steal percent of steal time */
3541 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3542 READ_ONCE(halt_poll_max_steal)) {
3543 vcpu->stat.halt_no_poll_steal++;
3544 return true;
3545 }
3546 return false;
3547 }
3548
3549 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3550 {
3551 /* kvm common code refers to this, but never calls it */
3552 BUG();
3553 return 0;
3554 }
3555
3556 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3557 struct kvm_one_reg *reg)
3558 {
3559 int r = -EINVAL;
3560
3561 switch (reg->id) {
3562 case KVM_REG_S390_TODPR:
3563 r = put_user(vcpu->arch.sie_block->todpr,
3564 (u32 __user *)reg->addr);
3565 break;
3566 case KVM_REG_S390_EPOCHDIFF:
3567 r = put_user(vcpu->arch.sie_block->epoch,
3568 (u64 __user *)reg->addr);
3569 break;
3570 case KVM_REG_S390_CPU_TIMER:
3571 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3572 (u64 __user *)reg->addr);
3573 break;
3574 case KVM_REG_S390_CLOCK_COMP:
3575 r = put_user(vcpu->arch.sie_block->ckc,
3576 (u64 __user *)reg->addr);
3577 break;
3578 case KVM_REG_S390_PFTOKEN:
3579 r = put_user(vcpu->arch.pfault_token,
3580 (u64 __user *)reg->addr);
3581 break;
3582 case KVM_REG_S390_PFCOMPARE:
3583 r = put_user(vcpu->arch.pfault_compare,
3584 (u64 __user *)reg->addr);
3585 break;
3586 case KVM_REG_S390_PFSELECT:
3587 r = put_user(vcpu->arch.pfault_select,
3588 (u64 __user *)reg->addr);
3589 break;
3590 case KVM_REG_S390_PP:
3591 r = put_user(vcpu->arch.sie_block->pp,
3592 (u64 __user *)reg->addr);
3593 break;
3594 case KVM_REG_S390_GBEA:
3595 r = put_user(vcpu->arch.sie_block->gbea,
3596 (u64 __user *)reg->addr);
3597 break;
3598 default:
3599 break;
3600 }
3601
3602 return r;
3603 }
3604
3605 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3606 struct kvm_one_reg *reg)
3607 {
3608 int r = -EINVAL;
3609 __u64 val;
3610
3611 switch (reg->id) {
3612 case KVM_REG_S390_TODPR:
3613 r = get_user(vcpu->arch.sie_block->todpr,
3614 (u32 __user *)reg->addr);
3615 break;
3616 case KVM_REG_S390_EPOCHDIFF:
3617 r = get_user(vcpu->arch.sie_block->epoch,
3618 (u64 __user *)reg->addr);
3619 break;
3620 case KVM_REG_S390_CPU_TIMER:
3621 r = get_user(val, (u64 __user *)reg->addr);
3622 if (!r)
3623 kvm_s390_set_cpu_timer(vcpu, val);
3624 break;
3625 case KVM_REG_S390_CLOCK_COMP:
3626 r = get_user(vcpu->arch.sie_block->ckc,
3627 (u64 __user *)reg->addr);
3628 break;
3629 case KVM_REG_S390_PFTOKEN:
3630 r = get_user(vcpu->arch.pfault_token,
3631 (u64 __user *)reg->addr);
3632 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3633 kvm_clear_async_pf_completion_queue(vcpu);
3634 break;
3635 case KVM_REG_S390_PFCOMPARE:
3636 r = get_user(vcpu->arch.pfault_compare,
3637 (u64 __user *)reg->addr);
3638 break;
3639 case KVM_REG_S390_PFSELECT:
3640 r = get_user(vcpu->arch.pfault_select,
3641 (u64 __user *)reg->addr);
3642 break;
3643 case KVM_REG_S390_PP:
3644 r = get_user(vcpu->arch.sie_block->pp,
3645 (u64 __user *)reg->addr);
3646 break;
3647 case KVM_REG_S390_GBEA:
3648 r = get_user(vcpu->arch.sie_block->gbea,
3649 (u64 __user *)reg->addr);
3650 break;
3651 default:
3652 break;
3653 }
3654
3655 return r;
3656 }
3657
3658 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3659 {
3660 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3661 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3662 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3663
3664 kvm_clear_async_pf_completion_queue(vcpu);
3665 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3666 kvm_s390_vcpu_stop(vcpu);
3667 kvm_s390_clear_local_irqs(vcpu);
3668 }
3669
3670 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3671 {
3672 /* Initial reset is a superset of the normal reset */
3673 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3674
3675 /*
3676 * This equals initial cpu reset in pop, but we don't switch to ESA.
3677 * We do not only reset the internal data, but also ...
3678 */
3679 vcpu->arch.sie_block->gpsw.mask = 0;
3680 vcpu->arch.sie_block->gpsw.addr = 0;
3681 kvm_s390_set_prefix(vcpu, 0);
3682 kvm_s390_set_cpu_timer(vcpu, 0);
3683 vcpu->arch.sie_block->ckc = 0;
3684 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3685 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3686 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3687
3688 /* ... the data in sync regs */
3689 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3690 vcpu->run->s.regs.ckc = 0;
3691 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3692 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3693 vcpu->run->psw_addr = 0;
3694 vcpu->run->psw_mask = 0;
3695 vcpu->run->s.regs.todpr = 0;
3696 vcpu->run->s.regs.cputm = 0;
3697 vcpu->run->s.regs.ckc = 0;
3698 vcpu->run->s.regs.pp = 0;
3699 vcpu->run->s.regs.gbea = 1;
3700 vcpu->run->s.regs.fpc = 0;
3701 /*
3702 * Do not reset these registers in the protected case, as some of
3703 * them are overlayed and they are not accessible in this case
3704 * anyway.
3705 */
3706 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3707 vcpu->arch.sie_block->gbea = 1;
3708 vcpu->arch.sie_block->pp = 0;
3709 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3710 vcpu->arch.sie_block->todpr = 0;
3711 }
3712 }
3713
3714 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3715 {
3716 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3717
3718 /* Clear reset is a superset of the initial reset */
3719 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3720
3721 memset(&regs->gprs, 0, sizeof(regs->gprs));
3722 memset(&regs->vrs, 0, sizeof(regs->vrs));
3723 memset(&regs->acrs, 0, sizeof(regs->acrs));
3724 memset(&regs->gscb, 0, sizeof(regs->gscb));
3725
3726 regs->etoken = 0;
3727 regs->etoken_extension = 0;
3728 }
3729
3730 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3731 {
3732 vcpu_load(vcpu);
3733 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3734 vcpu_put(vcpu);
3735 return 0;
3736 }
3737
3738 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3739 {
3740 vcpu_load(vcpu);
3741 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3742 vcpu_put(vcpu);
3743 return 0;
3744 }
3745
3746 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3747 struct kvm_sregs *sregs)
3748 {
3749 vcpu_load(vcpu);
3750
3751 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3752 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3753
3754 vcpu_put(vcpu);
3755 return 0;
3756 }
3757
3758 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3759 struct kvm_sregs *sregs)
3760 {
3761 vcpu_load(vcpu);
3762
3763 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3764 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3765
3766 vcpu_put(vcpu);
3767 return 0;
3768 }
3769
3770 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3771 {
3772 int ret = 0;
3773
3774 vcpu_load(vcpu);
3775
3776 if (test_fp_ctl(fpu->fpc)) {
3777 ret = -EINVAL;
3778 goto out;
3779 }
3780 vcpu->run->s.regs.fpc = fpu->fpc;
3781 if (MACHINE_HAS_VX)
3782 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3783 (freg_t *) fpu->fprs);
3784 else
3785 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3786
3787 out:
3788 vcpu_put(vcpu);
3789 return ret;
3790 }
3791
3792 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3793 {
3794 vcpu_load(vcpu);
3795
3796 /* make sure we have the latest values */
3797 save_fpu_regs();
3798 if (MACHINE_HAS_VX)
3799 convert_vx_to_fp((freg_t *) fpu->fprs,
3800 (__vector128 *) vcpu->run->s.regs.vrs);
3801 else
3802 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3803 fpu->fpc = vcpu->run->s.regs.fpc;
3804
3805 vcpu_put(vcpu);
3806 return 0;
3807 }
3808
3809 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3810 {
3811 int rc = 0;
3812
3813 if (!is_vcpu_stopped(vcpu))
3814 rc = -EBUSY;
3815 else {
3816 vcpu->run->psw_mask = psw.mask;
3817 vcpu->run->psw_addr = psw.addr;
3818 }
3819 return rc;
3820 }
3821
3822 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3823 struct kvm_translation *tr)
3824 {
3825 return -EINVAL; /* not implemented yet */
3826 }
3827
3828 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3829 KVM_GUESTDBG_USE_HW_BP | \
3830 KVM_GUESTDBG_ENABLE)
3831
3832 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3833 struct kvm_guest_debug *dbg)
3834 {
3835 int rc = 0;
3836
3837 vcpu_load(vcpu);
3838
3839 vcpu->guest_debug = 0;
3840 kvm_s390_clear_bp_data(vcpu);
3841
3842 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3843 rc = -EINVAL;
3844 goto out;
3845 }
3846 if (!sclp.has_gpere) {
3847 rc = -EINVAL;
3848 goto out;
3849 }
3850
3851 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3852 vcpu->guest_debug = dbg->control;
3853 /* enforce guest PER */
3854 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3855
3856 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3857 rc = kvm_s390_import_bp_data(vcpu, dbg);
3858 } else {
3859 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3860 vcpu->arch.guestdbg.last_bp = 0;
3861 }
3862
3863 if (rc) {
3864 vcpu->guest_debug = 0;
3865 kvm_s390_clear_bp_data(vcpu);
3866 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3867 }
3868
3869 out:
3870 vcpu_put(vcpu);
3871 return rc;
3872 }
3873
3874 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3875 struct kvm_mp_state *mp_state)
3876 {
3877 int ret;
3878
3879 vcpu_load(vcpu);
3880
3881 /* CHECK_STOP and LOAD are not supported yet */
3882 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3883 KVM_MP_STATE_OPERATING;
3884
3885 vcpu_put(vcpu);
3886 return ret;
3887 }
3888
3889 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3890 struct kvm_mp_state *mp_state)
3891 {
3892 int rc = 0;
3893
3894 vcpu_load(vcpu);
3895
3896 /* user space knows about this interface - let it control the state */
3897 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3898
3899 switch (mp_state->mp_state) {
3900 case KVM_MP_STATE_STOPPED:
3901 rc = kvm_s390_vcpu_stop(vcpu);
3902 break;
3903 case KVM_MP_STATE_OPERATING:
3904 rc = kvm_s390_vcpu_start(vcpu);
3905 break;
3906 case KVM_MP_STATE_LOAD:
3907 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3908 rc = -ENXIO;
3909 break;
3910 }
3911 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3912 break;
3913 case KVM_MP_STATE_CHECK_STOP:
3914 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3915 default:
3916 rc = -ENXIO;
3917 }
3918
3919 vcpu_put(vcpu);
3920 return rc;
3921 }
3922
3923 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3924 {
3925 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3926 }
3927
3928 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3929 {
3930 retry:
3931 kvm_s390_vcpu_request_handled(vcpu);
3932 if (!kvm_request_pending(vcpu))
3933 return 0;
3934 /*
3935 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3936 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3937 * This ensures that the ipte instruction for this request has
3938 * already finished. We might race against a second unmapper that
3939 * wants to set the blocking bit. Lets just retry the request loop.
3940 */
3941 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3942 int rc;
3943 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3944 kvm_s390_get_prefix(vcpu),
3945 PAGE_SIZE * 2, PROT_WRITE);
3946 if (rc) {
3947 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3948 return rc;
3949 }
3950 goto retry;
3951 }
3952
3953 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3954 vcpu->arch.sie_block->ihcpu = 0xffff;
3955 goto retry;
3956 }
3957
3958 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3959 if (!ibs_enabled(vcpu)) {
3960 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3961 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3962 }
3963 goto retry;
3964 }
3965
3966 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3967 if (ibs_enabled(vcpu)) {
3968 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3969 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3970 }
3971 goto retry;
3972 }
3973
3974 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3975 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3976 goto retry;
3977 }
3978
3979 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3980 /*
3981 * Disable CMM virtualization; we will emulate the ESSA
3982 * instruction manually, in order to provide additional
3983 * functionalities needed for live migration.
3984 */
3985 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3986 goto retry;
3987 }
3988
3989 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3990 /*
3991 * Re-enable CMM virtualization if CMMA is available and
3992 * CMM has been used.
3993 */
3994 if ((vcpu->kvm->arch.use_cmma) &&
3995 (vcpu->kvm->mm->context.uses_cmm))
3996 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3997 goto retry;
3998 }
3999
4000 /* nothing to do, just clear the request */
4001 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
4002 /* we left the vsie handler, nothing to do, just clear the request */
4003 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4004
4005 return 0;
4006 }
4007
4008 void kvm_s390_set_tod_clock(struct kvm *kvm,
4009 const struct kvm_s390_vm_tod_clock *gtod)
4010 {
4011 struct kvm_vcpu *vcpu;
4012 union tod_clock clk;
4013 int i;
4014
4015 mutex_lock(&kvm->lock);
4016 preempt_disable();
4017
4018 store_tod_clock_ext(&clk);
4019
4020 kvm->arch.epoch = gtod->tod - clk.tod;
4021 kvm->arch.epdx = 0;
4022 if (test_kvm_facility(kvm, 139)) {
4023 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4024 if (kvm->arch.epoch > gtod->tod)
4025 kvm->arch.epdx -= 1;
4026 }
4027
4028 kvm_s390_vcpu_block_all(kvm);
4029 kvm_for_each_vcpu(i, vcpu, kvm) {
4030 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4031 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
4032 }
4033
4034 kvm_s390_vcpu_unblock_all(kvm);
4035 preempt_enable();
4036 mutex_unlock(&kvm->lock);
4037 }
4038
4039 /**
4040 * kvm_arch_fault_in_page - fault-in guest page if necessary
4041 * @vcpu: The corresponding virtual cpu
4042 * @gpa: Guest physical address
4043 * @writable: Whether the page should be writable or not
4044 *
4045 * Make sure that a guest page has been faulted-in on the host.
4046 *
4047 * Return: Zero on success, negative error code otherwise.
4048 */
4049 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4050 {
4051 return gmap_fault(vcpu->arch.gmap, gpa,
4052 writable ? FAULT_FLAG_WRITE : 0);
4053 }
4054
4055 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4056 unsigned long token)
4057 {
4058 struct kvm_s390_interrupt inti;
4059 struct kvm_s390_irq irq;
4060
4061 if (start_token) {
4062 irq.u.ext.ext_params2 = token;
4063 irq.type = KVM_S390_INT_PFAULT_INIT;
4064 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4065 } else {
4066 inti.type = KVM_S390_INT_PFAULT_DONE;
4067 inti.parm64 = token;
4068 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4069 }
4070 }
4071
4072 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4073 struct kvm_async_pf *work)
4074 {
4075 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4076 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4077
4078 return true;
4079 }
4080
4081 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4082 struct kvm_async_pf *work)
4083 {
4084 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4085 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4086 }
4087
4088 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4089 struct kvm_async_pf *work)
4090 {
4091 /* s390 will always inject the page directly */
4092 }
4093
4094 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4095 {
4096 /*
4097 * s390 will always inject the page directly,
4098 * but we still want check_async_completion to cleanup
4099 */
4100 return true;
4101 }
4102
4103 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4104 {
4105 hva_t hva;
4106 struct kvm_arch_async_pf arch;
4107
4108 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4109 return false;
4110 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4111 vcpu->arch.pfault_compare)
4112 return false;
4113 if (psw_extint_disabled(vcpu))
4114 return false;
4115 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4116 return false;
4117 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4118 return false;
4119 if (!vcpu->arch.gmap->pfault_enabled)
4120 return false;
4121
4122 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4123 hva += current->thread.gmap_addr & ~PAGE_MASK;
4124 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4125 return false;
4126
4127 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4128 }
4129
4130 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4131 {
4132 int rc, cpuflags;
4133
4134 /*
4135 * On s390 notifications for arriving pages will be delivered directly
4136 * to the guest but the house keeping for completed pfaults is
4137 * handled outside the worker.
4138 */
4139 kvm_check_async_pf_completion(vcpu);
4140
4141 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4142 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4143
4144 if (need_resched())
4145 schedule();
4146
4147 if (!kvm_is_ucontrol(vcpu->kvm)) {
4148 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4149 if (rc)
4150 return rc;
4151 }
4152
4153 rc = kvm_s390_handle_requests(vcpu);
4154 if (rc)
4155 return rc;
4156
4157 if (guestdbg_enabled(vcpu)) {
4158 kvm_s390_backup_guest_per_regs(vcpu);
4159 kvm_s390_patch_guest_per_regs(vcpu);
4160 }
4161
4162 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4163
4164 vcpu->arch.sie_block->icptcode = 0;
4165 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4166 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4167 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4168
4169 return 0;
4170 }
4171
4172 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4173 {
4174 struct kvm_s390_pgm_info pgm_info = {
4175 .code = PGM_ADDRESSING,
4176 };
4177 u8 opcode, ilen;
4178 int rc;
4179
4180 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4181 trace_kvm_s390_sie_fault(vcpu);
4182
4183 /*
4184 * We want to inject an addressing exception, which is defined as a
4185 * suppressing or terminating exception. However, since we came here
4186 * by a DAT access exception, the PSW still points to the faulting
4187 * instruction since DAT exceptions are nullifying. So we've got
4188 * to look up the current opcode to get the length of the instruction
4189 * to be able to forward the PSW.
4190 */
4191 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4192 ilen = insn_length(opcode);
4193 if (rc < 0) {
4194 return rc;
4195 } else if (rc) {
4196 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4197 * Forward by arbitrary ilc, injection will take care of
4198 * nullification if necessary.
4199 */
4200 pgm_info = vcpu->arch.pgm;
4201 ilen = 4;
4202 }
4203 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4204 kvm_s390_forward_psw(vcpu, ilen);
4205 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4206 }
4207
4208 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4209 {
4210 struct mcck_volatile_info *mcck_info;
4211 struct sie_page *sie_page;
4212
4213 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4214 vcpu->arch.sie_block->icptcode);
4215 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4216
4217 if (guestdbg_enabled(vcpu))
4218 kvm_s390_restore_guest_per_regs(vcpu);
4219
4220 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4221 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4222
4223 if (exit_reason == -EINTR) {
4224 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4225 sie_page = container_of(vcpu->arch.sie_block,
4226 struct sie_page, sie_block);
4227 mcck_info = &sie_page->mcck_info;
4228 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4229 return 0;
4230 }
4231
4232 if (vcpu->arch.sie_block->icptcode > 0) {
4233 int rc = kvm_handle_sie_intercept(vcpu);
4234
4235 if (rc != -EOPNOTSUPP)
4236 return rc;
4237 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4238 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4239 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4240 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4241 return -EREMOTE;
4242 } else if (exit_reason != -EFAULT) {
4243 vcpu->stat.exit_null++;
4244 return 0;
4245 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4246 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4247 vcpu->run->s390_ucontrol.trans_exc_code =
4248 current->thread.gmap_addr;
4249 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4250 return -EREMOTE;
4251 } else if (current->thread.gmap_pfault) {
4252 trace_kvm_s390_major_guest_pfault(vcpu);
4253 current->thread.gmap_pfault = 0;
4254 if (kvm_arch_setup_async_pf(vcpu))
4255 return 0;
4256 vcpu->stat.pfault_sync++;
4257 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4258 }
4259 return vcpu_post_run_fault_in_sie(vcpu);
4260 }
4261
4262 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4263 static int __vcpu_run(struct kvm_vcpu *vcpu)
4264 {
4265 int rc, exit_reason;
4266 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4267
4268 /*
4269 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4270 * ning the guest), so that memslots (and other stuff) are protected
4271 */
4272 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4273
4274 do {
4275 rc = vcpu_pre_run(vcpu);
4276 if (rc)
4277 break;
4278
4279 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4280 /*
4281 * As PF_VCPU will be used in fault handler, between
4282 * guest_enter and guest_exit should be no uaccess.
4283 */
4284 local_irq_disable();
4285 guest_enter_irqoff();
4286 __disable_cpu_timer_accounting(vcpu);
4287 local_irq_enable();
4288 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4289 memcpy(sie_page->pv_grregs,
4290 vcpu->run->s.regs.gprs,
4291 sizeof(sie_page->pv_grregs));
4292 }
4293 if (test_cpu_flag(CIF_FPU))
4294 load_fpu_regs();
4295 exit_reason = sie64a(vcpu->arch.sie_block,
4296 vcpu->run->s.regs.gprs);
4297 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4298 memcpy(vcpu->run->s.regs.gprs,
4299 sie_page->pv_grregs,
4300 sizeof(sie_page->pv_grregs));
4301 /*
4302 * We're not allowed to inject interrupts on intercepts
4303 * that leave the guest state in an "in-between" state
4304 * where the next SIE entry will do a continuation.
4305 * Fence interrupts in our "internal" PSW.
4306 */
4307 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4308 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4309 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4310 }
4311 }
4312 local_irq_disable();
4313 __enable_cpu_timer_accounting(vcpu);
4314 guest_exit_irqoff();
4315 local_irq_enable();
4316 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4317
4318 rc = vcpu_post_run(vcpu, exit_reason);
4319 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4320
4321 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4322 return rc;
4323 }
4324
4325 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4326 {
4327 struct kvm_run *kvm_run = vcpu->run;
4328 struct runtime_instr_cb *riccb;
4329 struct gs_cb *gscb;
4330
4331 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4332 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4333 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4334 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4335 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4336 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4337 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4338 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4339 }
4340 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4341 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4342 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4343 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4344 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4345 kvm_clear_async_pf_completion_queue(vcpu);
4346 }
4347 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4348 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4349 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4350 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4351 }
4352 /*
4353 * If userspace sets the riccb (e.g. after migration) to a valid state,
4354 * we should enable RI here instead of doing the lazy enablement.
4355 */
4356 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4357 test_kvm_facility(vcpu->kvm, 64) &&
4358 riccb->v &&
4359 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4360 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4361 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4362 }
4363 /*
4364 * If userspace sets the gscb (e.g. after migration) to non-zero,
4365 * we should enable GS here instead of doing the lazy enablement.
4366 */
4367 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4368 test_kvm_facility(vcpu->kvm, 133) &&
4369 gscb->gssm &&
4370 !vcpu->arch.gs_enabled) {
4371 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4372 vcpu->arch.sie_block->ecb |= ECB_GS;
4373 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4374 vcpu->arch.gs_enabled = 1;
4375 }
4376 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4377 test_kvm_facility(vcpu->kvm, 82)) {
4378 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4379 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4380 }
4381 if (MACHINE_HAS_GS) {
4382 preempt_disable();
4383 __ctl_set_bit(2, 4);
4384 if (current->thread.gs_cb) {
4385 vcpu->arch.host_gscb = current->thread.gs_cb;
4386 save_gs_cb(vcpu->arch.host_gscb);
4387 }
4388 if (vcpu->arch.gs_enabled) {
4389 current->thread.gs_cb = (struct gs_cb *)
4390 &vcpu->run->s.regs.gscb;
4391 restore_gs_cb(current->thread.gs_cb);
4392 }
4393 preempt_enable();
4394 }
4395 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4396 }
4397
4398 static void sync_regs(struct kvm_vcpu *vcpu)
4399 {
4400 struct kvm_run *kvm_run = vcpu->run;
4401
4402 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4403 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4404 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4405 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4406 /* some control register changes require a tlb flush */
4407 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4408 }
4409 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4410 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4411 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4412 }
4413 save_access_regs(vcpu->arch.host_acrs);
4414 restore_access_regs(vcpu->run->s.regs.acrs);
4415 /* save host (userspace) fprs/vrs */
4416 save_fpu_regs();
4417 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4418 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4419 if (MACHINE_HAS_VX)
4420 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4421 else
4422 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4423 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4424 if (test_fp_ctl(current->thread.fpu.fpc))
4425 /* User space provided an invalid FPC, let's clear it */
4426 current->thread.fpu.fpc = 0;
4427
4428 /* Sync fmt2 only data */
4429 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4430 sync_regs_fmt2(vcpu);
4431 } else {
4432 /*
4433 * In several places we have to modify our internal view to
4434 * not do things that are disallowed by the ultravisor. For
4435 * example we must not inject interrupts after specific exits
4436 * (e.g. 112 prefix page not secure). We do this by turning
4437 * off the machine check, external and I/O interrupt bits
4438 * of our PSW copy. To avoid getting validity intercepts, we
4439 * do only accept the condition code from userspace.
4440 */
4441 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4442 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4443 PSW_MASK_CC;
4444 }
4445
4446 kvm_run->kvm_dirty_regs = 0;
4447 }
4448
4449 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4450 {
4451 struct kvm_run *kvm_run = vcpu->run;
4452
4453 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4454 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4455 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4456 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4457 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4458 if (MACHINE_HAS_GS) {
4459 preempt_disable();
4460 __ctl_set_bit(2, 4);
4461 if (vcpu->arch.gs_enabled)
4462 save_gs_cb(current->thread.gs_cb);
4463 current->thread.gs_cb = vcpu->arch.host_gscb;
4464 restore_gs_cb(vcpu->arch.host_gscb);
4465 if (!vcpu->arch.host_gscb)
4466 __ctl_clear_bit(2, 4);
4467 vcpu->arch.host_gscb = NULL;
4468 preempt_enable();
4469 }
4470 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4471 }
4472
4473 static void store_regs(struct kvm_vcpu *vcpu)
4474 {
4475 struct kvm_run *kvm_run = vcpu->run;
4476
4477 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4478 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4479 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4480 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4481 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4482 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4483 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4484 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4485 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4486 save_access_regs(vcpu->run->s.regs.acrs);
4487 restore_access_regs(vcpu->arch.host_acrs);
4488 /* Save guest register state */
4489 save_fpu_regs();
4490 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4491 /* Restore will be done lazily at return */
4492 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4493 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4494 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4495 store_regs_fmt2(vcpu);
4496 }
4497
4498 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4499 {
4500 struct kvm_run *kvm_run = vcpu->run;
4501 int rc;
4502
4503 if (kvm_run->immediate_exit)
4504 return -EINTR;
4505
4506 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4507 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4508 return -EINVAL;
4509
4510 vcpu_load(vcpu);
4511
4512 if (guestdbg_exit_pending(vcpu)) {
4513 kvm_s390_prepare_debug_exit(vcpu);
4514 rc = 0;
4515 goto out;
4516 }
4517
4518 kvm_sigset_activate(vcpu);
4519
4520 /*
4521 * no need to check the return value of vcpu_start as it can only have
4522 * an error for protvirt, but protvirt means user cpu state
4523 */
4524 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4525 kvm_s390_vcpu_start(vcpu);
4526 } else if (is_vcpu_stopped(vcpu)) {
4527 pr_err_ratelimited("can't run stopped vcpu %d\n",
4528 vcpu->vcpu_id);
4529 rc = -EINVAL;
4530 goto out;
4531 }
4532
4533 sync_regs(vcpu);
4534 enable_cpu_timer_accounting(vcpu);
4535
4536 might_fault();
4537 rc = __vcpu_run(vcpu);
4538
4539 if (signal_pending(current) && !rc) {
4540 kvm_run->exit_reason = KVM_EXIT_INTR;
4541 rc = -EINTR;
4542 }
4543
4544 if (guestdbg_exit_pending(vcpu) && !rc) {
4545 kvm_s390_prepare_debug_exit(vcpu);
4546 rc = 0;
4547 }
4548
4549 if (rc == -EREMOTE) {
4550 /* userspace support is needed, kvm_run has been prepared */
4551 rc = 0;
4552 }
4553
4554 disable_cpu_timer_accounting(vcpu);
4555 store_regs(vcpu);
4556
4557 kvm_sigset_deactivate(vcpu);
4558
4559 vcpu->stat.exit_userspace++;
4560 out:
4561 vcpu_put(vcpu);
4562 return rc;
4563 }
4564
4565 /*
4566 * store status at address
4567 * we use have two special cases:
4568 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4569 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4570 */
4571 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4572 {
4573 unsigned char archmode = 1;
4574 freg_t fprs[NUM_FPRS];
4575 unsigned int px;
4576 u64 clkcomp, cputm;
4577 int rc;
4578
4579 px = kvm_s390_get_prefix(vcpu);
4580 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4581 if (write_guest_abs(vcpu, 163, &archmode, 1))
4582 return -EFAULT;
4583 gpa = 0;
4584 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4585 if (write_guest_real(vcpu, 163, &archmode, 1))
4586 return -EFAULT;
4587 gpa = px;
4588 } else
4589 gpa -= __LC_FPREGS_SAVE_AREA;
4590
4591 /* manually convert vector registers if necessary */
4592 if (MACHINE_HAS_VX) {
4593 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4594 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4595 fprs, 128);
4596 } else {
4597 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4598 vcpu->run->s.regs.fprs, 128);
4599 }
4600 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4601 vcpu->run->s.regs.gprs, 128);
4602 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4603 &vcpu->arch.sie_block->gpsw, 16);
4604 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4605 &px, 4);
4606 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4607 &vcpu->run->s.regs.fpc, 4);
4608 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4609 &vcpu->arch.sie_block->todpr, 4);
4610 cputm = kvm_s390_get_cpu_timer(vcpu);
4611 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4612 &cputm, 8);
4613 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4614 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4615 &clkcomp, 8);
4616 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4617 &vcpu->run->s.regs.acrs, 64);
4618 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4619 &vcpu->arch.sie_block->gcr, 128);
4620 return rc ? -EFAULT : 0;
4621 }
4622
4623 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4624 {
4625 /*
4626 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4627 * switch in the run ioctl. Let's update our copies before we save
4628 * it into the save area
4629 */
4630 save_fpu_regs();
4631 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4632 save_access_regs(vcpu->run->s.regs.acrs);
4633
4634 return kvm_s390_store_status_unloaded(vcpu, addr);
4635 }
4636
4637 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4638 {
4639 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4640 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4641 }
4642
4643 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4644 {
4645 unsigned int i;
4646 struct kvm_vcpu *vcpu;
4647
4648 kvm_for_each_vcpu(i, vcpu, kvm) {
4649 __disable_ibs_on_vcpu(vcpu);
4650 }
4651 }
4652
4653 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4654 {
4655 if (!sclp.has_ibs)
4656 return;
4657 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4658 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4659 }
4660
4661 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4662 {
4663 int i, online_vcpus, r = 0, started_vcpus = 0;
4664
4665 if (!is_vcpu_stopped(vcpu))
4666 return 0;
4667
4668 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4669 /* Only one cpu at a time may enter/leave the STOPPED state. */
4670 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4671 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4672
4673 /* Let's tell the UV that we want to change into the operating state */
4674 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4675 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4676 if (r) {
4677 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4678 return r;
4679 }
4680 }
4681
4682 for (i = 0; i < online_vcpus; i++) {
4683 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4684 started_vcpus++;
4685 }
4686
4687 if (started_vcpus == 0) {
4688 /* we're the only active VCPU -> speed it up */
4689 __enable_ibs_on_vcpu(vcpu);
4690 } else if (started_vcpus == 1) {
4691 /*
4692 * As we are starting a second VCPU, we have to disable
4693 * the IBS facility on all VCPUs to remove potentially
4694 * outstanding ENABLE requests.
4695 */
4696 __disable_ibs_on_all_vcpus(vcpu->kvm);
4697 }
4698
4699 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4700 /*
4701 * The real PSW might have changed due to a RESTART interpreted by the
4702 * ultravisor. We block all interrupts and let the next sie exit
4703 * refresh our view.
4704 */
4705 if (kvm_s390_pv_cpu_is_protected(vcpu))
4706 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4707 /*
4708 * Another VCPU might have used IBS while we were offline.
4709 * Let's play safe and flush the VCPU at startup.
4710 */
4711 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4712 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4713 return 0;
4714 }
4715
4716 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4717 {
4718 int i, online_vcpus, r = 0, started_vcpus = 0;
4719 struct kvm_vcpu *started_vcpu = NULL;
4720
4721 if (is_vcpu_stopped(vcpu))
4722 return 0;
4723
4724 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4725 /* Only one cpu at a time may enter/leave the STOPPED state. */
4726 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4727 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4728
4729 /* Let's tell the UV that we want to change into the stopped state */
4730 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4731 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4732 if (r) {
4733 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4734 return r;
4735 }
4736 }
4737
4738 /*
4739 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4740 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4741 * have been fully processed. This will ensure that the VCPU
4742 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4743 */
4744 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4745 kvm_s390_clear_stop_irq(vcpu);
4746
4747 __disable_ibs_on_vcpu(vcpu);
4748
4749 for (i = 0; i < online_vcpus; i++) {
4750 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4751 started_vcpus++;
4752 started_vcpu = vcpu->kvm->vcpus[i];
4753 }
4754 }
4755
4756 if (started_vcpus == 1) {
4757 /*
4758 * As we only have one VCPU left, we want to enable the
4759 * IBS facility for that VCPU to speed it up.
4760 */
4761 __enable_ibs_on_vcpu(started_vcpu);
4762 }
4763
4764 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4765 return 0;
4766 }
4767
4768 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4769 struct kvm_enable_cap *cap)
4770 {
4771 int r;
4772
4773 if (cap->flags)
4774 return -EINVAL;
4775
4776 switch (cap->cap) {
4777 case KVM_CAP_S390_CSS_SUPPORT:
4778 if (!vcpu->kvm->arch.css_support) {
4779 vcpu->kvm->arch.css_support = 1;
4780 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4781 trace_kvm_s390_enable_css(vcpu->kvm);
4782 }
4783 r = 0;
4784 break;
4785 default:
4786 r = -EINVAL;
4787 break;
4788 }
4789 return r;
4790 }
4791
4792 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4793 struct kvm_s390_mem_op *mop)
4794 {
4795 void __user *uaddr = (void __user *)mop->buf;
4796 int r = 0;
4797
4798 if (mop->flags || !mop->size)
4799 return -EINVAL;
4800 if (mop->size + mop->sida_offset < mop->size)
4801 return -EINVAL;
4802 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4803 return -E2BIG;
4804 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4805 return -EINVAL;
4806
4807 switch (mop->op) {
4808 case KVM_S390_MEMOP_SIDA_READ:
4809 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4810 mop->sida_offset), mop->size))
4811 r = -EFAULT;
4812
4813 break;
4814 case KVM_S390_MEMOP_SIDA_WRITE:
4815 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4816 mop->sida_offset), uaddr, mop->size))
4817 r = -EFAULT;
4818 break;
4819 }
4820 return r;
4821 }
4822
4823 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4824 struct kvm_s390_mem_op *mop)
4825 {
4826 void __user *uaddr = (void __user *)mop->buf;
4827 void *tmpbuf = NULL;
4828 int r = 0;
4829 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4830 | KVM_S390_MEMOP_F_CHECK_ONLY
4831 | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4832
4833 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4834 return -EINVAL;
4835 if (mop->size > MEM_OP_MAX_SIZE)
4836 return -E2BIG;
4837 if (kvm_s390_pv_cpu_is_protected(vcpu))
4838 return -EINVAL;
4839 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4840 if (access_key_invalid(mop->key))
4841 return -EINVAL;
4842 } else {
4843 mop->key = 0;
4844 }
4845 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4846 tmpbuf = vmalloc(mop->size);
4847 if (!tmpbuf)
4848 return -ENOMEM;
4849 }
4850
4851 switch (mop->op) {
4852 case KVM_S390_MEMOP_LOGICAL_READ:
4853 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4854 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4855 GACC_FETCH, mop->key);
4856 break;
4857 }
4858 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4859 mop->size, mop->key);
4860 if (r == 0) {
4861 if (copy_to_user(uaddr, tmpbuf, mop->size))
4862 r = -EFAULT;
4863 }
4864 break;
4865 case KVM_S390_MEMOP_LOGICAL_WRITE:
4866 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4867 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4868 GACC_STORE, mop->key);
4869 break;
4870 }
4871 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4872 r = -EFAULT;
4873 break;
4874 }
4875 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4876 mop->size, mop->key);
4877 break;
4878 }
4879
4880 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4881 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4882
4883 vfree(tmpbuf);
4884 return r;
4885 }
4886
4887 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4888 struct kvm_s390_mem_op *mop)
4889 {
4890 int r, srcu_idx;
4891
4892 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4893
4894 switch (mop->op) {
4895 case KVM_S390_MEMOP_LOGICAL_READ:
4896 case KVM_S390_MEMOP_LOGICAL_WRITE:
4897 r = kvm_s390_vcpu_mem_op(vcpu, mop);
4898 break;
4899 case KVM_S390_MEMOP_SIDA_READ:
4900 case KVM_S390_MEMOP_SIDA_WRITE:
4901 /* we are locked against sida going away by the vcpu->mutex */
4902 r = kvm_s390_vcpu_sida_op(vcpu, mop);
4903 break;
4904 default:
4905 r = -EINVAL;
4906 }
4907
4908 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4909 return r;
4910 }
4911
4912 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4913 unsigned int ioctl, unsigned long arg)
4914 {
4915 struct kvm_vcpu *vcpu = filp->private_data;
4916 void __user *argp = (void __user *)arg;
4917
4918 switch (ioctl) {
4919 case KVM_S390_IRQ: {
4920 struct kvm_s390_irq s390irq;
4921
4922 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4923 return -EFAULT;
4924 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4925 }
4926 case KVM_S390_INTERRUPT: {
4927 struct kvm_s390_interrupt s390int;
4928 struct kvm_s390_irq s390irq = {};
4929
4930 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4931 return -EFAULT;
4932 if (s390int_to_s390irq(&s390int, &s390irq))
4933 return -EINVAL;
4934 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4935 }
4936 }
4937 return -ENOIOCTLCMD;
4938 }
4939
4940 long kvm_arch_vcpu_ioctl(struct file *filp,
4941 unsigned int ioctl, unsigned long arg)
4942 {
4943 struct kvm_vcpu *vcpu = filp->private_data;
4944 void __user *argp = (void __user *)arg;
4945 int idx;
4946 long r;
4947 u16 rc, rrc;
4948
4949 vcpu_load(vcpu);
4950
4951 switch (ioctl) {
4952 case KVM_S390_STORE_STATUS:
4953 idx = srcu_read_lock(&vcpu->kvm->srcu);
4954 r = kvm_s390_store_status_unloaded(vcpu, arg);
4955 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4956 break;
4957 case KVM_S390_SET_INITIAL_PSW: {
4958 psw_t psw;
4959
4960 r = -EFAULT;
4961 if (copy_from_user(&psw, argp, sizeof(psw)))
4962 break;
4963 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4964 break;
4965 }
4966 case KVM_S390_CLEAR_RESET:
4967 r = 0;
4968 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4969 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4970 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4971 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4972 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4973 rc, rrc);
4974 }
4975 break;
4976 case KVM_S390_INITIAL_RESET:
4977 r = 0;
4978 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4979 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4980 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4981 UVC_CMD_CPU_RESET_INITIAL,
4982 &rc, &rrc);
4983 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4984 rc, rrc);
4985 }
4986 break;
4987 case KVM_S390_NORMAL_RESET:
4988 r = 0;
4989 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4990 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4991 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4992 UVC_CMD_CPU_RESET, &rc, &rrc);
4993 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4994 rc, rrc);
4995 }
4996 break;
4997 case KVM_SET_ONE_REG:
4998 case KVM_GET_ONE_REG: {
4999 struct kvm_one_reg reg;
5000 r = -EINVAL;
5001 if (kvm_s390_pv_cpu_is_protected(vcpu))
5002 break;
5003 r = -EFAULT;
5004 if (copy_from_user(&reg, argp, sizeof(reg)))
5005 break;
5006 if (ioctl == KVM_SET_ONE_REG)
5007 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5008 else
5009 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5010 break;
5011 }
5012 #ifdef CONFIG_KVM_S390_UCONTROL
5013 case KVM_S390_UCAS_MAP: {
5014 struct kvm_s390_ucas_mapping ucasmap;
5015
5016 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5017 r = -EFAULT;
5018 break;
5019 }
5020
5021 if (!kvm_is_ucontrol(vcpu->kvm)) {
5022 r = -EINVAL;
5023 break;
5024 }
5025
5026 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5027 ucasmap.vcpu_addr, ucasmap.length);
5028 break;
5029 }
5030 case KVM_S390_UCAS_UNMAP: {
5031 struct kvm_s390_ucas_mapping ucasmap;
5032
5033 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5034 r = -EFAULT;
5035 break;
5036 }
5037
5038 if (!kvm_is_ucontrol(vcpu->kvm)) {
5039 r = -EINVAL;
5040 break;
5041 }
5042
5043 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5044 ucasmap.length);
5045 break;
5046 }
5047 #endif
5048 case KVM_S390_VCPU_FAULT: {
5049 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5050 break;
5051 }
5052 case KVM_ENABLE_CAP:
5053 {
5054 struct kvm_enable_cap cap;
5055 r = -EFAULT;
5056 if (copy_from_user(&cap, argp, sizeof(cap)))
5057 break;
5058 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5059 break;
5060 }
5061 case KVM_S390_MEM_OP: {
5062 struct kvm_s390_mem_op mem_op;
5063
5064 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5065 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5066 else
5067 r = -EFAULT;
5068 break;
5069 }
5070 case KVM_S390_SET_IRQ_STATE: {
5071 struct kvm_s390_irq_state irq_state;
5072
5073 r = -EFAULT;
5074 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5075 break;
5076 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5077 irq_state.len == 0 ||
5078 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5079 r = -EINVAL;
5080 break;
5081 }
5082 /* do not use irq_state.flags, it will break old QEMUs */
5083 r = kvm_s390_set_irq_state(vcpu,
5084 (void __user *) irq_state.buf,
5085 irq_state.len);
5086 break;
5087 }
5088 case KVM_S390_GET_IRQ_STATE: {
5089 struct kvm_s390_irq_state irq_state;
5090
5091 r = -EFAULT;
5092 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5093 break;
5094 if (irq_state.len == 0) {
5095 r = -EINVAL;
5096 break;
5097 }
5098 /* do not use irq_state.flags, it will break old QEMUs */
5099 r = kvm_s390_get_irq_state(vcpu,
5100 (__u8 __user *) irq_state.buf,
5101 irq_state.len);
5102 break;
5103 }
5104 default:
5105 r = -ENOTTY;
5106 }
5107
5108 vcpu_put(vcpu);
5109 return r;
5110 }
5111
5112 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5113 {
5114 #ifdef CONFIG_KVM_S390_UCONTROL
5115 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5116 && (kvm_is_ucontrol(vcpu->kvm))) {
5117 vmf->page = virt_to_page(vcpu->arch.sie_block);
5118 get_page(vmf->page);
5119 return 0;
5120 }
5121 #endif
5122 return VM_FAULT_SIGBUS;
5123 }
5124
5125 /* Section: memory related */
5126 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5127 struct kvm_memory_slot *memslot,
5128 const struct kvm_userspace_memory_region *mem,
5129 enum kvm_mr_change change)
5130 {
5131 /* A few sanity checks. We can have memory slots which have to be
5132 located/ended at a segment boundary (1MB). The memory in userland is
5133 ok to be fragmented into various different vmas. It is okay to mmap()
5134 and munmap() stuff in this slot after doing this call at any time */
5135
5136 if (mem->userspace_addr & 0xffffful)
5137 return -EINVAL;
5138
5139 if (mem->memory_size & 0xffffful)
5140 return -EINVAL;
5141
5142 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5143 return -EINVAL;
5144
5145 /* When we are protected, we should not change the memory slots */
5146 if (kvm_s390_pv_get_handle(kvm))
5147 return -EINVAL;
5148 return 0;
5149 }
5150
5151 void kvm_arch_commit_memory_region(struct kvm *kvm,
5152 const struct kvm_userspace_memory_region *mem,
5153 struct kvm_memory_slot *old,
5154 const struct kvm_memory_slot *new,
5155 enum kvm_mr_change change)
5156 {
5157 int rc = 0;
5158
5159 switch (change) {
5160 case KVM_MR_DELETE:
5161 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5162 old->npages * PAGE_SIZE);
5163 break;
5164 case KVM_MR_MOVE:
5165 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5166 old->npages * PAGE_SIZE);
5167 if (rc)
5168 break;
5169 fallthrough;
5170 case KVM_MR_CREATE:
5171 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5172 mem->guest_phys_addr, mem->memory_size);
5173 break;
5174 case KVM_MR_FLAGS_ONLY:
5175 break;
5176 default:
5177 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5178 }
5179 if (rc)
5180 pr_warn("failed to commit memory region\n");
5181 return;
5182 }
5183
5184 static inline unsigned long nonhyp_mask(int i)
5185 {
5186 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5187
5188 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5189 }
5190
5191 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5192 {
5193 vcpu->valid_wakeup = false;
5194 }
5195
5196 static int __init kvm_s390_init(void)
5197 {
5198 int i;
5199
5200 if (!sclp.has_sief2) {
5201 pr_info("SIE is not available\n");
5202 return -ENODEV;
5203 }
5204
5205 if (nested && hpage) {
5206 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5207 return -EINVAL;
5208 }
5209
5210 for (i = 0; i < 16; i++)
5211 kvm_s390_fac_base[i] |=
5212 stfle_fac_list[i] & nonhyp_mask(i);
5213
5214 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5215 }
5216
5217 static void __exit kvm_s390_exit(void)
5218 {
5219 kvm_exit();
5220 }
5221
5222 module_init(kvm_s390_init);
5223 module_exit(kvm_s390_exit);
5224
5225 /*
5226 * Enable autoloading of the kvm module.
5227 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5228 * since x86 takes a different approach.
5229 */
5230 #include <linux/miscdevice.h>
5231 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5232 MODULE_ALIAS("devname:kvm");