]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - arch/s390/kvm/kvm-s390.c
Merge remote-tracking branch 'asoc/topic/pcm512x' into asoc-next
[mirror_ubuntu-focal-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2017
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 { "instruction_spx", VCPU_STAT(instruction_spx) },
93 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 { "instruction_stap", VCPU_STAT(instruction_stap) },
95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 { "instruction_essa", VCPU_STAT(instruction_essa) },
100 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 { "instruction_sie", VCPU_STAT(instruction_sie) },
105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 { "diagnose_10", VCPU_STAT(diagnose_10) },
122 { "diagnose_44", VCPU_STAT(diagnose_44) },
123 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 { "diagnose_258", VCPU_STAT(diagnose_258) },
125 { "diagnose_308", VCPU_STAT(diagnose_308) },
126 { "diagnose_500", VCPU_STAT(diagnose_500) },
127 { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131 __u8 epoch_idx;
132 __u64 tod;
133 __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147 return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162 /* every s390 is virtualization enabled ;-) */
163 return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167 unsigned long end);
168
169 /*
170 * This callback is executed during stop_machine(). All CPUs are therefore
171 * temporarily stopped. In order not to change guest behavior, we have to
172 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173 * so a CPU won't be stopped while calculating with the epoch.
174 */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176 void *v)
177 {
178 struct kvm *kvm;
179 struct kvm_vcpu *vcpu;
180 int i;
181 unsigned long long *delta = v;
182
183 list_for_each_entry(kvm, &vm_list, vm_list) {
184 kvm->arch.epoch -= *delta;
185 kvm_for_each_vcpu(i, vcpu, kvm) {
186 vcpu->arch.sie_block->epoch -= *delta;
187 if (vcpu->arch.cputm_enabled)
188 vcpu->arch.cputm_start += *delta;
189 if (vcpu->arch.vsie_block)
190 vcpu->arch.vsie_block->epoch -= *delta;
191 }
192 }
193 return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197 .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202 gmap_notifier.notifier_call = kvm_gmap_notifier;
203 gmap_register_pte_notifier(&gmap_notifier);
204 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205 gmap_register_pte_notifier(&vsie_gmap_notifier);
206 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207 &kvm_clock_notifier);
208 return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213 gmap_unregister_pte_notifier(&gmap_notifier);
214 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216 &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221 set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227 int cc;
228
229 asm volatile(
230 /* Parameter registers are ignored for "test bit" */
231 " plo 0,0,0,0(0)\n"
232 " ipm %0\n"
233 " srl %0,28\n"
234 : "=d" (cc)
235 : "d" (r0)
236 : "cc");
237 return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242 int i;
243
244 for (i = 0; i < 256; ++i) {
245 if (plo_test_bit(i))
246 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247 }
248
249 if (test_facility(28)) /* TOD-clock steering */
250 ptff(kvm_s390_available_subfunc.ptff,
251 sizeof(kvm_s390_available_subfunc.ptff),
252 PTFF_QAF);
253
254 if (test_facility(17)) { /* MSA */
255 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.kmac);
257 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.kmc);
259 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260 kvm_s390_available_subfunc.km);
261 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.kimd);
263 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.klmd);
265 }
266 if (test_facility(76)) /* MSA3 */
267 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.pckmo);
269 if (test_facility(77)) { /* MSA4 */
270 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.kmctr);
272 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273 kvm_s390_available_subfunc.kmf);
274 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.kmo);
276 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.pcc);
278 }
279 if (test_facility(57)) /* MSA5 */
280 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281 kvm_s390_available_subfunc.ppno);
282
283 if (test_facility(146)) /* MSA8 */
284 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285 kvm_s390_available_subfunc.kma);
286
287 if (MACHINE_HAS_ESOP)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289 /*
290 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292 */
293 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294 !test_facility(3) || !nested)
295 return;
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297 if (sclp.has_64bscao)
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299 if (sclp.has_siif)
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301 if (sclp.has_gpere)
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303 if (sclp.has_gsls)
304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305 if (sclp.has_ib)
306 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307 if (sclp.has_cei)
308 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309 if (sclp.has_ibs)
310 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311 if (sclp.has_kss)
312 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313 /*
314 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315 * all skey handling functions read/set the skey from the PGSTE
316 * instead of the real storage key.
317 *
318 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319 * pages being detected as preserved although they are resident.
320 *
321 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323 *
324 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327 *
328 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329 * cannot easily shadow the SCA because of the ipte lock.
330 */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336 if (!kvm_s390_dbf)
337 return -ENOMEM;
338
339 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340 debug_unregister(kvm_s390_dbf);
341 return -ENOMEM;
342 }
343
344 kvm_s390_cpu_feat_init();
345
346 /* Register floating interrupt controller interface. */
347 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352 debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357 unsigned int ioctl, unsigned long arg)
358 {
359 if (ioctl == KVM_S390_ENABLE_SIE)
360 return s390_enable_sie();
361 return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366 int r;
367
368 switch (ext) {
369 case KVM_CAP_S390_PSW:
370 case KVM_CAP_S390_GMAP:
371 case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373 case KVM_CAP_S390_UCONTROL:
374 #endif
375 case KVM_CAP_ASYNC_PF:
376 case KVM_CAP_SYNC_REGS:
377 case KVM_CAP_ONE_REG:
378 case KVM_CAP_ENABLE_CAP:
379 case KVM_CAP_S390_CSS_SUPPORT:
380 case KVM_CAP_IOEVENTFD:
381 case KVM_CAP_DEVICE_CTRL:
382 case KVM_CAP_ENABLE_CAP_VM:
383 case KVM_CAP_S390_IRQCHIP:
384 case KVM_CAP_VM_ATTRIBUTES:
385 case KVM_CAP_MP_STATE:
386 case KVM_CAP_IMMEDIATE_EXIT:
387 case KVM_CAP_S390_INJECT_IRQ:
388 case KVM_CAP_S390_USER_SIGP:
389 case KVM_CAP_S390_USER_STSI:
390 case KVM_CAP_S390_SKEYS:
391 case KVM_CAP_S390_IRQ_STATE:
392 case KVM_CAP_S390_USER_INSTR0:
393 case KVM_CAP_S390_CMMA_MIGRATION:
394 case KVM_CAP_S390_AIS:
395 case KVM_CAP_S390_AIS_MIGRATION:
396 r = 1;
397 break;
398 case KVM_CAP_S390_MEM_OP:
399 r = MEM_OP_MAX_SIZE;
400 break;
401 case KVM_CAP_NR_VCPUS:
402 case KVM_CAP_MAX_VCPUS:
403 r = KVM_S390_BSCA_CPU_SLOTS;
404 if (!kvm_s390_use_sca_entries())
405 r = KVM_MAX_VCPUS;
406 else if (sclp.has_esca && sclp.has_64bscao)
407 r = KVM_S390_ESCA_CPU_SLOTS;
408 break;
409 case KVM_CAP_NR_MEMSLOTS:
410 r = KVM_USER_MEM_SLOTS;
411 break;
412 case KVM_CAP_S390_COW:
413 r = MACHINE_HAS_ESOP;
414 break;
415 case KVM_CAP_S390_VECTOR_REGISTERS:
416 r = MACHINE_HAS_VX;
417 break;
418 case KVM_CAP_S390_RI:
419 r = test_facility(64);
420 break;
421 case KVM_CAP_S390_GS:
422 r = test_facility(133);
423 break;
424 default:
425 r = 0;
426 }
427 return r;
428 }
429
430 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
431 struct kvm_memory_slot *memslot)
432 {
433 gfn_t cur_gfn, last_gfn;
434 unsigned long address;
435 struct gmap *gmap = kvm->arch.gmap;
436
437 /* Loop over all guest pages */
438 last_gfn = memslot->base_gfn + memslot->npages;
439 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
440 address = gfn_to_hva_memslot(memslot, cur_gfn);
441
442 if (test_and_clear_guest_dirty(gmap->mm, address))
443 mark_page_dirty(kvm, cur_gfn);
444 if (fatal_signal_pending(current))
445 return;
446 cond_resched();
447 }
448 }
449
450 /* Section: vm related */
451 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
452
453 /*
454 * Get (and clear) the dirty memory log for a memory slot.
455 */
456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
457 struct kvm_dirty_log *log)
458 {
459 int r;
460 unsigned long n;
461 struct kvm_memslots *slots;
462 struct kvm_memory_slot *memslot;
463 int is_dirty = 0;
464
465 if (kvm_is_ucontrol(kvm))
466 return -EINVAL;
467
468 mutex_lock(&kvm->slots_lock);
469
470 r = -EINVAL;
471 if (log->slot >= KVM_USER_MEM_SLOTS)
472 goto out;
473
474 slots = kvm_memslots(kvm);
475 memslot = id_to_memslot(slots, log->slot);
476 r = -ENOENT;
477 if (!memslot->dirty_bitmap)
478 goto out;
479
480 kvm_s390_sync_dirty_log(kvm, memslot);
481 r = kvm_get_dirty_log(kvm, log, &is_dirty);
482 if (r)
483 goto out;
484
485 /* Clear the dirty log */
486 if (is_dirty) {
487 n = kvm_dirty_bitmap_bytes(memslot);
488 memset(memslot->dirty_bitmap, 0, n);
489 }
490 r = 0;
491 out:
492 mutex_unlock(&kvm->slots_lock);
493 return r;
494 }
495
496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
497 {
498 unsigned int i;
499 struct kvm_vcpu *vcpu;
500
501 kvm_for_each_vcpu(i, vcpu, kvm) {
502 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
503 }
504 }
505
506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
507 {
508 int r;
509
510 if (cap->flags)
511 return -EINVAL;
512
513 switch (cap->cap) {
514 case KVM_CAP_S390_IRQCHIP:
515 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
516 kvm->arch.use_irqchip = 1;
517 r = 0;
518 break;
519 case KVM_CAP_S390_USER_SIGP:
520 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
521 kvm->arch.user_sigp = 1;
522 r = 0;
523 break;
524 case KVM_CAP_S390_VECTOR_REGISTERS:
525 mutex_lock(&kvm->lock);
526 if (kvm->created_vcpus) {
527 r = -EBUSY;
528 } else if (MACHINE_HAS_VX) {
529 set_kvm_facility(kvm->arch.model.fac_mask, 129);
530 set_kvm_facility(kvm->arch.model.fac_list, 129);
531 if (test_facility(134)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 134);
533 set_kvm_facility(kvm->arch.model.fac_list, 134);
534 }
535 if (test_facility(135)) {
536 set_kvm_facility(kvm->arch.model.fac_mask, 135);
537 set_kvm_facility(kvm->arch.model.fac_list, 135);
538 }
539 r = 0;
540 } else
541 r = -EINVAL;
542 mutex_unlock(&kvm->lock);
543 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
544 r ? "(not available)" : "(success)");
545 break;
546 case KVM_CAP_S390_RI:
547 r = -EINVAL;
548 mutex_lock(&kvm->lock);
549 if (kvm->created_vcpus) {
550 r = -EBUSY;
551 } else if (test_facility(64)) {
552 set_kvm_facility(kvm->arch.model.fac_mask, 64);
553 set_kvm_facility(kvm->arch.model.fac_list, 64);
554 r = 0;
555 }
556 mutex_unlock(&kvm->lock);
557 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
558 r ? "(not available)" : "(success)");
559 break;
560 case KVM_CAP_S390_AIS:
561 mutex_lock(&kvm->lock);
562 if (kvm->created_vcpus) {
563 r = -EBUSY;
564 } else {
565 set_kvm_facility(kvm->arch.model.fac_mask, 72);
566 set_kvm_facility(kvm->arch.model.fac_list, 72);
567 r = 0;
568 }
569 mutex_unlock(&kvm->lock);
570 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
571 r ? "(not available)" : "(success)");
572 break;
573 case KVM_CAP_S390_GS:
574 r = -EINVAL;
575 mutex_lock(&kvm->lock);
576 if (atomic_read(&kvm->online_vcpus)) {
577 r = -EBUSY;
578 } else if (test_facility(133)) {
579 set_kvm_facility(kvm->arch.model.fac_mask, 133);
580 set_kvm_facility(kvm->arch.model.fac_list, 133);
581 r = 0;
582 }
583 mutex_unlock(&kvm->lock);
584 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
585 r ? "(not available)" : "(success)");
586 break;
587 case KVM_CAP_S390_USER_STSI:
588 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
589 kvm->arch.user_stsi = 1;
590 r = 0;
591 break;
592 case KVM_CAP_S390_USER_INSTR0:
593 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
594 kvm->arch.user_instr0 = 1;
595 icpt_operexc_on_all_vcpus(kvm);
596 r = 0;
597 break;
598 default:
599 r = -EINVAL;
600 break;
601 }
602 return r;
603 }
604
605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
606 {
607 int ret;
608
609 switch (attr->attr) {
610 case KVM_S390_VM_MEM_LIMIT_SIZE:
611 ret = 0;
612 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
613 kvm->arch.mem_limit);
614 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
615 ret = -EFAULT;
616 break;
617 default:
618 ret = -ENXIO;
619 break;
620 }
621 return ret;
622 }
623
624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
625 {
626 int ret;
627 unsigned int idx;
628 switch (attr->attr) {
629 case KVM_S390_VM_MEM_ENABLE_CMMA:
630 ret = -ENXIO;
631 if (!sclp.has_cmma)
632 break;
633
634 ret = -EBUSY;
635 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
636 mutex_lock(&kvm->lock);
637 if (!kvm->created_vcpus) {
638 kvm->arch.use_cmma = 1;
639 ret = 0;
640 }
641 mutex_unlock(&kvm->lock);
642 break;
643 case KVM_S390_VM_MEM_CLR_CMMA:
644 ret = -ENXIO;
645 if (!sclp.has_cmma)
646 break;
647 ret = -EINVAL;
648 if (!kvm->arch.use_cmma)
649 break;
650
651 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
652 mutex_lock(&kvm->lock);
653 idx = srcu_read_lock(&kvm->srcu);
654 s390_reset_cmma(kvm->arch.gmap->mm);
655 srcu_read_unlock(&kvm->srcu, idx);
656 mutex_unlock(&kvm->lock);
657 ret = 0;
658 break;
659 case KVM_S390_VM_MEM_LIMIT_SIZE: {
660 unsigned long new_limit;
661
662 if (kvm_is_ucontrol(kvm))
663 return -EINVAL;
664
665 if (get_user(new_limit, (u64 __user *)attr->addr))
666 return -EFAULT;
667
668 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
669 new_limit > kvm->arch.mem_limit)
670 return -E2BIG;
671
672 if (!new_limit)
673 return -EINVAL;
674
675 /* gmap_create takes last usable address */
676 if (new_limit != KVM_S390_NO_MEM_LIMIT)
677 new_limit -= 1;
678
679 ret = -EBUSY;
680 mutex_lock(&kvm->lock);
681 if (!kvm->created_vcpus) {
682 /* gmap_create will round the limit up */
683 struct gmap *new = gmap_create(current->mm, new_limit);
684
685 if (!new) {
686 ret = -ENOMEM;
687 } else {
688 gmap_remove(kvm->arch.gmap);
689 new->private = kvm;
690 kvm->arch.gmap = new;
691 ret = 0;
692 }
693 }
694 mutex_unlock(&kvm->lock);
695 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
696 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
697 (void *) kvm->arch.gmap->asce);
698 break;
699 }
700 default:
701 ret = -ENXIO;
702 break;
703 }
704 return ret;
705 }
706
707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
708
709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
710 {
711 struct kvm_vcpu *vcpu;
712 int i;
713
714 if (!test_kvm_facility(kvm, 76))
715 return -EINVAL;
716
717 mutex_lock(&kvm->lock);
718 switch (attr->attr) {
719 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
720 get_random_bytes(
721 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
722 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
723 kvm->arch.crypto.aes_kw = 1;
724 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
725 break;
726 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
727 get_random_bytes(
728 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
729 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
730 kvm->arch.crypto.dea_kw = 1;
731 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
732 break;
733 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
734 kvm->arch.crypto.aes_kw = 0;
735 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
736 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
737 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
738 break;
739 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
740 kvm->arch.crypto.dea_kw = 0;
741 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
742 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
744 break;
745 default:
746 mutex_unlock(&kvm->lock);
747 return -ENXIO;
748 }
749
750 kvm_for_each_vcpu(i, vcpu, kvm) {
751 kvm_s390_vcpu_crypto_setup(vcpu);
752 exit_sie(vcpu);
753 }
754 mutex_unlock(&kvm->lock);
755 return 0;
756 }
757
758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
759 {
760 int cx;
761 struct kvm_vcpu *vcpu;
762
763 kvm_for_each_vcpu(cx, vcpu, kvm)
764 kvm_s390_sync_request(req, vcpu);
765 }
766
767 /*
768 * Must be called with kvm->srcu held to avoid races on memslots, and with
769 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
770 */
771 static int kvm_s390_vm_start_migration(struct kvm *kvm)
772 {
773 struct kvm_s390_migration_state *mgs;
774 struct kvm_memory_slot *ms;
775 /* should be the only one */
776 struct kvm_memslots *slots;
777 unsigned long ram_pages;
778 int slotnr;
779
780 /* migration mode already enabled */
781 if (kvm->arch.migration_state)
782 return 0;
783
784 slots = kvm_memslots(kvm);
785 if (!slots || !slots->used_slots)
786 return -EINVAL;
787
788 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
789 if (!mgs)
790 return -ENOMEM;
791 kvm->arch.migration_state = mgs;
792
793 if (kvm->arch.use_cmma) {
794 /*
795 * Get the first slot. They are reverse sorted by base_gfn, so
796 * the first slot is also the one at the end of the address
797 * space. We have verified above that at least one slot is
798 * present.
799 */
800 ms = slots->memslots;
801 /* round up so we only use full longs */
802 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
803 /* allocate enough bytes to store all the bits */
804 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
805 if (!mgs->pgste_bitmap) {
806 kfree(mgs);
807 kvm->arch.migration_state = NULL;
808 return -ENOMEM;
809 }
810
811 mgs->bitmap_size = ram_pages;
812 atomic64_set(&mgs->dirty_pages, ram_pages);
813 /* mark all the pages in active slots as dirty */
814 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
815 ms = slots->memslots + slotnr;
816 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
817 }
818
819 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
820 }
821 return 0;
822 }
823
824 /*
825 * Must be called with kvm->lock to avoid races with ourselves and
826 * kvm_s390_vm_start_migration.
827 */
828 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
829 {
830 struct kvm_s390_migration_state *mgs;
831
832 /* migration mode already disabled */
833 if (!kvm->arch.migration_state)
834 return 0;
835 mgs = kvm->arch.migration_state;
836 kvm->arch.migration_state = NULL;
837
838 if (kvm->arch.use_cmma) {
839 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
840 vfree(mgs->pgste_bitmap);
841 }
842 kfree(mgs);
843 return 0;
844 }
845
846 static int kvm_s390_vm_set_migration(struct kvm *kvm,
847 struct kvm_device_attr *attr)
848 {
849 int idx, res = -ENXIO;
850
851 mutex_lock(&kvm->lock);
852 switch (attr->attr) {
853 case KVM_S390_VM_MIGRATION_START:
854 idx = srcu_read_lock(&kvm->srcu);
855 res = kvm_s390_vm_start_migration(kvm);
856 srcu_read_unlock(&kvm->srcu, idx);
857 break;
858 case KVM_S390_VM_MIGRATION_STOP:
859 res = kvm_s390_vm_stop_migration(kvm);
860 break;
861 default:
862 break;
863 }
864 mutex_unlock(&kvm->lock);
865
866 return res;
867 }
868
869 static int kvm_s390_vm_get_migration(struct kvm *kvm,
870 struct kvm_device_attr *attr)
871 {
872 u64 mig = (kvm->arch.migration_state != NULL);
873
874 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
875 return -ENXIO;
876
877 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
878 return -EFAULT;
879 return 0;
880 }
881
882 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
883 {
884 struct kvm_s390_vm_tod_clock gtod;
885
886 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
887 return -EFAULT;
888
889 if (test_kvm_facility(kvm, 139))
890 kvm_s390_set_tod_clock_ext(kvm, &gtod);
891 else if (gtod.epoch_idx == 0)
892 kvm_s390_set_tod_clock(kvm, gtod.tod);
893 else
894 return -EINVAL;
895
896 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
897 gtod.epoch_idx, gtod.tod);
898
899 return 0;
900 }
901
902 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
903 {
904 u8 gtod_high;
905
906 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
907 sizeof(gtod_high)))
908 return -EFAULT;
909
910 if (gtod_high != 0)
911 return -EINVAL;
912 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
913
914 return 0;
915 }
916
917 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
918 {
919 u64 gtod;
920
921 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
922 return -EFAULT;
923
924 kvm_s390_set_tod_clock(kvm, gtod);
925 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
926 return 0;
927 }
928
929 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
930 {
931 int ret;
932
933 if (attr->flags)
934 return -EINVAL;
935
936 switch (attr->attr) {
937 case KVM_S390_VM_TOD_EXT:
938 ret = kvm_s390_set_tod_ext(kvm, attr);
939 break;
940 case KVM_S390_VM_TOD_HIGH:
941 ret = kvm_s390_set_tod_high(kvm, attr);
942 break;
943 case KVM_S390_VM_TOD_LOW:
944 ret = kvm_s390_set_tod_low(kvm, attr);
945 break;
946 default:
947 ret = -ENXIO;
948 break;
949 }
950 return ret;
951 }
952
953 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
954 struct kvm_s390_vm_tod_clock *gtod)
955 {
956 struct kvm_s390_tod_clock_ext htod;
957
958 preempt_disable();
959
960 get_tod_clock_ext((char *)&htod);
961
962 gtod->tod = htod.tod + kvm->arch.epoch;
963 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
964
965 if (gtod->tod < htod.tod)
966 gtod->epoch_idx += 1;
967
968 preempt_enable();
969 }
970
971 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
972 {
973 struct kvm_s390_vm_tod_clock gtod;
974
975 memset(&gtod, 0, sizeof(gtod));
976
977 if (test_kvm_facility(kvm, 139))
978 kvm_s390_get_tod_clock_ext(kvm, &gtod);
979 else
980 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
981
982 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
983 return -EFAULT;
984
985 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
986 gtod.epoch_idx, gtod.tod);
987 return 0;
988 }
989
990 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
991 {
992 u8 gtod_high = 0;
993
994 if (copy_to_user((void __user *)attr->addr, &gtod_high,
995 sizeof(gtod_high)))
996 return -EFAULT;
997 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
998
999 return 0;
1000 }
1001
1002 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1003 {
1004 u64 gtod;
1005
1006 gtod = kvm_s390_get_tod_clock_fast(kvm);
1007 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1008 return -EFAULT;
1009 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1010
1011 return 0;
1012 }
1013
1014 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1015 {
1016 int ret;
1017
1018 if (attr->flags)
1019 return -EINVAL;
1020
1021 switch (attr->attr) {
1022 case KVM_S390_VM_TOD_EXT:
1023 ret = kvm_s390_get_tod_ext(kvm, attr);
1024 break;
1025 case KVM_S390_VM_TOD_HIGH:
1026 ret = kvm_s390_get_tod_high(kvm, attr);
1027 break;
1028 case KVM_S390_VM_TOD_LOW:
1029 ret = kvm_s390_get_tod_low(kvm, attr);
1030 break;
1031 default:
1032 ret = -ENXIO;
1033 break;
1034 }
1035 return ret;
1036 }
1037
1038 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1039 {
1040 struct kvm_s390_vm_cpu_processor *proc;
1041 u16 lowest_ibc, unblocked_ibc;
1042 int ret = 0;
1043
1044 mutex_lock(&kvm->lock);
1045 if (kvm->created_vcpus) {
1046 ret = -EBUSY;
1047 goto out;
1048 }
1049 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1050 if (!proc) {
1051 ret = -ENOMEM;
1052 goto out;
1053 }
1054 if (!copy_from_user(proc, (void __user *)attr->addr,
1055 sizeof(*proc))) {
1056 kvm->arch.model.cpuid = proc->cpuid;
1057 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1058 unblocked_ibc = sclp.ibc & 0xfff;
1059 if (lowest_ibc && proc->ibc) {
1060 if (proc->ibc > unblocked_ibc)
1061 kvm->arch.model.ibc = unblocked_ibc;
1062 else if (proc->ibc < lowest_ibc)
1063 kvm->arch.model.ibc = lowest_ibc;
1064 else
1065 kvm->arch.model.ibc = proc->ibc;
1066 }
1067 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1068 S390_ARCH_FAC_LIST_SIZE_BYTE);
1069 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1070 kvm->arch.model.ibc,
1071 kvm->arch.model.cpuid);
1072 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1073 kvm->arch.model.fac_list[0],
1074 kvm->arch.model.fac_list[1],
1075 kvm->arch.model.fac_list[2]);
1076 } else
1077 ret = -EFAULT;
1078 kfree(proc);
1079 out:
1080 mutex_unlock(&kvm->lock);
1081 return ret;
1082 }
1083
1084 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1085 struct kvm_device_attr *attr)
1086 {
1087 struct kvm_s390_vm_cpu_feat data;
1088 int ret = -EBUSY;
1089
1090 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1091 return -EFAULT;
1092 if (!bitmap_subset((unsigned long *) data.feat,
1093 kvm_s390_available_cpu_feat,
1094 KVM_S390_VM_CPU_FEAT_NR_BITS))
1095 return -EINVAL;
1096
1097 mutex_lock(&kvm->lock);
1098 if (!atomic_read(&kvm->online_vcpus)) {
1099 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1100 KVM_S390_VM_CPU_FEAT_NR_BITS);
1101 ret = 0;
1102 }
1103 mutex_unlock(&kvm->lock);
1104 return ret;
1105 }
1106
1107 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1108 struct kvm_device_attr *attr)
1109 {
1110 /*
1111 * Once supported by kernel + hw, we have to store the subfunctions
1112 * in kvm->arch and remember that user space configured them.
1113 */
1114 return -ENXIO;
1115 }
1116
1117 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119 int ret = -ENXIO;
1120
1121 switch (attr->attr) {
1122 case KVM_S390_VM_CPU_PROCESSOR:
1123 ret = kvm_s390_set_processor(kvm, attr);
1124 break;
1125 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1126 ret = kvm_s390_set_processor_feat(kvm, attr);
1127 break;
1128 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1129 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1130 break;
1131 }
1132 return ret;
1133 }
1134
1135 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1136 {
1137 struct kvm_s390_vm_cpu_processor *proc;
1138 int ret = 0;
1139
1140 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1141 if (!proc) {
1142 ret = -ENOMEM;
1143 goto out;
1144 }
1145 proc->cpuid = kvm->arch.model.cpuid;
1146 proc->ibc = kvm->arch.model.ibc;
1147 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1148 S390_ARCH_FAC_LIST_SIZE_BYTE);
1149 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1150 kvm->arch.model.ibc,
1151 kvm->arch.model.cpuid);
1152 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1153 kvm->arch.model.fac_list[0],
1154 kvm->arch.model.fac_list[1],
1155 kvm->arch.model.fac_list[2]);
1156 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1157 ret = -EFAULT;
1158 kfree(proc);
1159 out:
1160 return ret;
1161 }
1162
1163 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1164 {
1165 struct kvm_s390_vm_cpu_machine *mach;
1166 int ret = 0;
1167
1168 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1169 if (!mach) {
1170 ret = -ENOMEM;
1171 goto out;
1172 }
1173 get_cpu_id((struct cpuid *) &mach->cpuid);
1174 mach->ibc = sclp.ibc;
1175 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1176 S390_ARCH_FAC_LIST_SIZE_BYTE);
1177 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1178 sizeof(S390_lowcore.stfle_fac_list));
1179 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1180 kvm->arch.model.ibc,
1181 kvm->arch.model.cpuid);
1182 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1183 mach->fac_mask[0],
1184 mach->fac_mask[1],
1185 mach->fac_mask[2]);
1186 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1187 mach->fac_list[0],
1188 mach->fac_list[1],
1189 mach->fac_list[2]);
1190 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1191 ret = -EFAULT;
1192 kfree(mach);
1193 out:
1194 return ret;
1195 }
1196
1197 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1198 struct kvm_device_attr *attr)
1199 {
1200 struct kvm_s390_vm_cpu_feat data;
1201
1202 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1203 KVM_S390_VM_CPU_FEAT_NR_BITS);
1204 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1205 return -EFAULT;
1206 return 0;
1207 }
1208
1209 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1210 struct kvm_device_attr *attr)
1211 {
1212 struct kvm_s390_vm_cpu_feat data;
1213
1214 bitmap_copy((unsigned long *) data.feat,
1215 kvm_s390_available_cpu_feat,
1216 KVM_S390_VM_CPU_FEAT_NR_BITS);
1217 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1218 return -EFAULT;
1219 return 0;
1220 }
1221
1222 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1223 struct kvm_device_attr *attr)
1224 {
1225 /*
1226 * Once we can actually configure subfunctions (kernel + hw support),
1227 * we have to check if they were already set by user space, if so copy
1228 * them from kvm->arch.
1229 */
1230 return -ENXIO;
1231 }
1232
1233 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1234 struct kvm_device_attr *attr)
1235 {
1236 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1237 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1238 return -EFAULT;
1239 return 0;
1240 }
1241 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1242 {
1243 int ret = -ENXIO;
1244
1245 switch (attr->attr) {
1246 case KVM_S390_VM_CPU_PROCESSOR:
1247 ret = kvm_s390_get_processor(kvm, attr);
1248 break;
1249 case KVM_S390_VM_CPU_MACHINE:
1250 ret = kvm_s390_get_machine(kvm, attr);
1251 break;
1252 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1253 ret = kvm_s390_get_processor_feat(kvm, attr);
1254 break;
1255 case KVM_S390_VM_CPU_MACHINE_FEAT:
1256 ret = kvm_s390_get_machine_feat(kvm, attr);
1257 break;
1258 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1259 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1260 break;
1261 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1262 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1263 break;
1264 }
1265 return ret;
1266 }
1267
1268 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1269 {
1270 int ret;
1271
1272 switch (attr->group) {
1273 case KVM_S390_VM_MEM_CTRL:
1274 ret = kvm_s390_set_mem_control(kvm, attr);
1275 break;
1276 case KVM_S390_VM_TOD:
1277 ret = kvm_s390_set_tod(kvm, attr);
1278 break;
1279 case KVM_S390_VM_CPU_MODEL:
1280 ret = kvm_s390_set_cpu_model(kvm, attr);
1281 break;
1282 case KVM_S390_VM_CRYPTO:
1283 ret = kvm_s390_vm_set_crypto(kvm, attr);
1284 break;
1285 case KVM_S390_VM_MIGRATION:
1286 ret = kvm_s390_vm_set_migration(kvm, attr);
1287 break;
1288 default:
1289 ret = -ENXIO;
1290 break;
1291 }
1292
1293 return ret;
1294 }
1295
1296 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1297 {
1298 int ret;
1299
1300 switch (attr->group) {
1301 case KVM_S390_VM_MEM_CTRL:
1302 ret = kvm_s390_get_mem_control(kvm, attr);
1303 break;
1304 case KVM_S390_VM_TOD:
1305 ret = kvm_s390_get_tod(kvm, attr);
1306 break;
1307 case KVM_S390_VM_CPU_MODEL:
1308 ret = kvm_s390_get_cpu_model(kvm, attr);
1309 break;
1310 case KVM_S390_VM_MIGRATION:
1311 ret = kvm_s390_vm_get_migration(kvm, attr);
1312 break;
1313 default:
1314 ret = -ENXIO;
1315 break;
1316 }
1317
1318 return ret;
1319 }
1320
1321 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1322 {
1323 int ret;
1324
1325 switch (attr->group) {
1326 case KVM_S390_VM_MEM_CTRL:
1327 switch (attr->attr) {
1328 case KVM_S390_VM_MEM_ENABLE_CMMA:
1329 case KVM_S390_VM_MEM_CLR_CMMA:
1330 ret = sclp.has_cmma ? 0 : -ENXIO;
1331 break;
1332 case KVM_S390_VM_MEM_LIMIT_SIZE:
1333 ret = 0;
1334 break;
1335 default:
1336 ret = -ENXIO;
1337 break;
1338 }
1339 break;
1340 case KVM_S390_VM_TOD:
1341 switch (attr->attr) {
1342 case KVM_S390_VM_TOD_LOW:
1343 case KVM_S390_VM_TOD_HIGH:
1344 ret = 0;
1345 break;
1346 default:
1347 ret = -ENXIO;
1348 break;
1349 }
1350 break;
1351 case KVM_S390_VM_CPU_MODEL:
1352 switch (attr->attr) {
1353 case KVM_S390_VM_CPU_PROCESSOR:
1354 case KVM_S390_VM_CPU_MACHINE:
1355 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1356 case KVM_S390_VM_CPU_MACHINE_FEAT:
1357 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1358 ret = 0;
1359 break;
1360 /* configuring subfunctions is not supported yet */
1361 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1362 default:
1363 ret = -ENXIO;
1364 break;
1365 }
1366 break;
1367 case KVM_S390_VM_CRYPTO:
1368 switch (attr->attr) {
1369 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1370 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1371 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1372 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1373 ret = 0;
1374 break;
1375 default:
1376 ret = -ENXIO;
1377 break;
1378 }
1379 break;
1380 case KVM_S390_VM_MIGRATION:
1381 ret = 0;
1382 break;
1383 default:
1384 ret = -ENXIO;
1385 break;
1386 }
1387
1388 return ret;
1389 }
1390
1391 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1392 {
1393 uint8_t *keys;
1394 uint64_t hva;
1395 int srcu_idx, i, r = 0;
1396
1397 if (args->flags != 0)
1398 return -EINVAL;
1399
1400 /* Is this guest using storage keys? */
1401 if (!mm_use_skey(current->mm))
1402 return KVM_S390_GET_SKEYS_NONE;
1403
1404 /* Enforce sane limit on memory allocation */
1405 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1406 return -EINVAL;
1407
1408 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1409 if (!keys)
1410 return -ENOMEM;
1411
1412 down_read(&current->mm->mmap_sem);
1413 srcu_idx = srcu_read_lock(&kvm->srcu);
1414 for (i = 0; i < args->count; i++) {
1415 hva = gfn_to_hva(kvm, args->start_gfn + i);
1416 if (kvm_is_error_hva(hva)) {
1417 r = -EFAULT;
1418 break;
1419 }
1420
1421 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1422 if (r)
1423 break;
1424 }
1425 srcu_read_unlock(&kvm->srcu, srcu_idx);
1426 up_read(&current->mm->mmap_sem);
1427
1428 if (!r) {
1429 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1430 sizeof(uint8_t) * args->count);
1431 if (r)
1432 r = -EFAULT;
1433 }
1434
1435 kvfree(keys);
1436 return r;
1437 }
1438
1439 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1440 {
1441 uint8_t *keys;
1442 uint64_t hva;
1443 int srcu_idx, i, r = 0;
1444
1445 if (args->flags != 0)
1446 return -EINVAL;
1447
1448 /* Enforce sane limit on memory allocation */
1449 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1450 return -EINVAL;
1451
1452 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1453 if (!keys)
1454 return -ENOMEM;
1455
1456 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1457 sizeof(uint8_t) * args->count);
1458 if (r) {
1459 r = -EFAULT;
1460 goto out;
1461 }
1462
1463 /* Enable storage key handling for the guest */
1464 r = s390_enable_skey();
1465 if (r)
1466 goto out;
1467
1468 down_read(&current->mm->mmap_sem);
1469 srcu_idx = srcu_read_lock(&kvm->srcu);
1470 for (i = 0; i < args->count; i++) {
1471 hva = gfn_to_hva(kvm, args->start_gfn + i);
1472 if (kvm_is_error_hva(hva)) {
1473 r = -EFAULT;
1474 break;
1475 }
1476
1477 /* Lowest order bit is reserved */
1478 if (keys[i] & 0x01) {
1479 r = -EINVAL;
1480 break;
1481 }
1482
1483 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1484 if (r)
1485 break;
1486 }
1487 srcu_read_unlock(&kvm->srcu, srcu_idx);
1488 up_read(&current->mm->mmap_sem);
1489 out:
1490 kvfree(keys);
1491 return r;
1492 }
1493
1494 /*
1495 * Base address and length must be sent at the start of each block, therefore
1496 * it's cheaper to send some clean data, as long as it's less than the size of
1497 * two longs.
1498 */
1499 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1500 /* for consistency */
1501 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1502
1503 /*
1504 * This function searches for the next page with dirty CMMA attributes, and
1505 * saves the attributes in the buffer up to either the end of the buffer or
1506 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1507 * no trailing clean bytes are saved.
1508 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1509 * output buffer will indicate 0 as length.
1510 */
1511 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1512 struct kvm_s390_cmma_log *args)
1513 {
1514 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1515 unsigned long bufsize, hva, pgstev, i, next, cur;
1516 int srcu_idx, peek, r = 0, rr;
1517 u8 *res;
1518
1519 cur = args->start_gfn;
1520 i = next = pgstev = 0;
1521
1522 if (unlikely(!kvm->arch.use_cmma))
1523 return -ENXIO;
1524 /* Invalid/unsupported flags were specified */
1525 if (args->flags & ~KVM_S390_CMMA_PEEK)
1526 return -EINVAL;
1527 /* Migration mode query, and we are not doing a migration */
1528 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1529 if (!peek && !s)
1530 return -EINVAL;
1531 /* CMMA is disabled or was not used, or the buffer has length zero */
1532 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1533 if (!bufsize || !kvm->mm->context.use_cmma) {
1534 memset(args, 0, sizeof(*args));
1535 return 0;
1536 }
1537
1538 if (!peek) {
1539 /* We are not peeking, and there are no dirty pages */
1540 if (!atomic64_read(&s->dirty_pages)) {
1541 memset(args, 0, sizeof(*args));
1542 return 0;
1543 }
1544 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1545 args->start_gfn);
1546 if (cur >= s->bitmap_size) /* nothing found, loop back */
1547 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1548 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1549 memset(args, 0, sizeof(*args));
1550 return 0;
1551 }
1552 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1553 }
1554
1555 res = vmalloc(bufsize);
1556 if (!res)
1557 return -ENOMEM;
1558
1559 args->start_gfn = cur;
1560
1561 down_read(&kvm->mm->mmap_sem);
1562 srcu_idx = srcu_read_lock(&kvm->srcu);
1563 while (i < bufsize) {
1564 hva = gfn_to_hva(kvm, cur);
1565 if (kvm_is_error_hva(hva)) {
1566 r = -EFAULT;
1567 break;
1568 }
1569 /* decrement only if we actually flipped the bit to 0 */
1570 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1571 atomic64_dec(&s->dirty_pages);
1572 r = get_pgste(kvm->mm, hva, &pgstev);
1573 if (r < 0)
1574 pgstev = 0;
1575 /* save the value */
1576 res[i++] = (pgstev >> 24) & 0x43;
1577 /*
1578 * if the next bit is too far away, stop.
1579 * if we reached the previous "next", find the next one
1580 */
1581 if (!peek) {
1582 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1583 break;
1584 if (cur == next)
1585 next = find_next_bit(s->pgste_bitmap,
1586 s->bitmap_size, cur + 1);
1587 /* reached the end of the bitmap or of the buffer, stop */
1588 if ((next >= s->bitmap_size) ||
1589 (next >= args->start_gfn + bufsize))
1590 break;
1591 }
1592 cur++;
1593 }
1594 srcu_read_unlock(&kvm->srcu, srcu_idx);
1595 up_read(&kvm->mm->mmap_sem);
1596 args->count = i;
1597 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1598
1599 rr = copy_to_user((void __user *)args->values, res, args->count);
1600 if (rr)
1601 r = -EFAULT;
1602
1603 vfree(res);
1604 return r;
1605 }
1606
1607 /*
1608 * This function sets the CMMA attributes for the given pages. If the input
1609 * buffer has zero length, no action is taken, otherwise the attributes are
1610 * set and the mm->context.use_cmma flag is set.
1611 */
1612 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1613 const struct kvm_s390_cmma_log *args)
1614 {
1615 unsigned long hva, mask, pgstev, i;
1616 uint8_t *bits;
1617 int srcu_idx, r = 0;
1618
1619 mask = args->mask;
1620
1621 if (!kvm->arch.use_cmma)
1622 return -ENXIO;
1623 /* invalid/unsupported flags */
1624 if (args->flags != 0)
1625 return -EINVAL;
1626 /* Enforce sane limit on memory allocation */
1627 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1628 return -EINVAL;
1629 /* Nothing to do */
1630 if (args->count == 0)
1631 return 0;
1632
1633 bits = vmalloc(sizeof(*bits) * args->count);
1634 if (!bits)
1635 return -ENOMEM;
1636
1637 r = copy_from_user(bits, (void __user *)args->values, args->count);
1638 if (r) {
1639 r = -EFAULT;
1640 goto out;
1641 }
1642
1643 down_read(&kvm->mm->mmap_sem);
1644 srcu_idx = srcu_read_lock(&kvm->srcu);
1645 for (i = 0; i < args->count; i++) {
1646 hva = gfn_to_hva(kvm, args->start_gfn + i);
1647 if (kvm_is_error_hva(hva)) {
1648 r = -EFAULT;
1649 break;
1650 }
1651
1652 pgstev = bits[i];
1653 pgstev = pgstev << 24;
1654 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1655 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1656 }
1657 srcu_read_unlock(&kvm->srcu, srcu_idx);
1658 up_read(&kvm->mm->mmap_sem);
1659
1660 if (!kvm->mm->context.use_cmma) {
1661 down_write(&kvm->mm->mmap_sem);
1662 kvm->mm->context.use_cmma = 1;
1663 up_write(&kvm->mm->mmap_sem);
1664 }
1665 out:
1666 vfree(bits);
1667 return r;
1668 }
1669
1670 long kvm_arch_vm_ioctl(struct file *filp,
1671 unsigned int ioctl, unsigned long arg)
1672 {
1673 struct kvm *kvm = filp->private_data;
1674 void __user *argp = (void __user *)arg;
1675 struct kvm_device_attr attr;
1676 int r;
1677
1678 switch (ioctl) {
1679 case KVM_S390_INTERRUPT: {
1680 struct kvm_s390_interrupt s390int;
1681
1682 r = -EFAULT;
1683 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1684 break;
1685 r = kvm_s390_inject_vm(kvm, &s390int);
1686 break;
1687 }
1688 case KVM_ENABLE_CAP: {
1689 struct kvm_enable_cap cap;
1690 r = -EFAULT;
1691 if (copy_from_user(&cap, argp, sizeof(cap)))
1692 break;
1693 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1694 break;
1695 }
1696 case KVM_CREATE_IRQCHIP: {
1697 struct kvm_irq_routing_entry routing;
1698
1699 r = -EINVAL;
1700 if (kvm->arch.use_irqchip) {
1701 /* Set up dummy routing. */
1702 memset(&routing, 0, sizeof(routing));
1703 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1704 }
1705 break;
1706 }
1707 case KVM_SET_DEVICE_ATTR: {
1708 r = -EFAULT;
1709 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1710 break;
1711 r = kvm_s390_vm_set_attr(kvm, &attr);
1712 break;
1713 }
1714 case KVM_GET_DEVICE_ATTR: {
1715 r = -EFAULT;
1716 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1717 break;
1718 r = kvm_s390_vm_get_attr(kvm, &attr);
1719 break;
1720 }
1721 case KVM_HAS_DEVICE_ATTR: {
1722 r = -EFAULT;
1723 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1724 break;
1725 r = kvm_s390_vm_has_attr(kvm, &attr);
1726 break;
1727 }
1728 case KVM_S390_GET_SKEYS: {
1729 struct kvm_s390_skeys args;
1730
1731 r = -EFAULT;
1732 if (copy_from_user(&args, argp,
1733 sizeof(struct kvm_s390_skeys)))
1734 break;
1735 r = kvm_s390_get_skeys(kvm, &args);
1736 break;
1737 }
1738 case KVM_S390_SET_SKEYS: {
1739 struct kvm_s390_skeys args;
1740
1741 r = -EFAULT;
1742 if (copy_from_user(&args, argp,
1743 sizeof(struct kvm_s390_skeys)))
1744 break;
1745 r = kvm_s390_set_skeys(kvm, &args);
1746 break;
1747 }
1748 case KVM_S390_GET_CMMA_BITS: {
1749 struct kvm_s390_cmma_log args;
1750
1751 r = -EFAULT;
1752 if (copy_from_user(&args, argp, sizeof(args)))
1753 break;
1754 r = kvm_s390_get_cmma_bits(kvm, &args);
1755 if (!r) {
1756 r = copy_to_user(argp, &args, sizeof(args));
1757 if (r)
1758 r = -EFAULT;
1759 }
1760 break;
1761 }
1762 case KVM_S390_SET_CMMA_BITS: {
1763 struct kvm_s390_cmma_log args;
1764
1765 r = -EFAULT;
1766 if (copy_from_user(&args, argp, sizeof(args)))
1767 break;
1768 r = kvm_s390_set_cmma_bits(kvm, &args);
1769 break;
1770 }
1771 default:
1772 r = -ENOTTY;
1773 }
1774
1775 return r;
1776 }
1777
1778 static int kvm_s390_query_ap_config(u8 *config)
1779 {
1780 u32 fcn_code = 0x04000000UL;
1781 u32 cc = 0;
1782
1783 memset(config, 0, 128);
1784 asm volatile(
1785 "lgr 0,%1\n"
1786 "lgr 2,%2\n"
1787 ".long 0xb2af0000\n" /* PQAP(QCI) */
1788 "0: ipm %0\n"
1789 "srl %0,28\n"
1790 "1:\n"
1791 EX_TABLE(0b, 1b)
1792 : "+r" (cc)
1793 : "r" (fcn_code), "r" (config)
1794 : "cc", "0", "2", "memory"
1795 );
1796
1797 return cc;
1798 }
1799
1800 static int kvm_s390_apxa_installed(void)
1801 {
1802 u8 config[128];
1803 int cc;
1804
1805 if (test_facility(12)) {
1806 cc = kvm_s390_query_ap_config(config);
1807
1808 if (cc)
1809 pr_err("PQAP(QCI) failed with cc=%d", cc);
1810 else
1811 return config[0] & 0x40;
1812 }
1813
1814 return 0;
1815 }
1816
1817 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1818 {
1819 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1820
1821 if (kvm_s390_apxa_installed())
1822 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1823 else
1824 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1825 }
1826
1827 static u64 kvm_s390_get_initial_cpuid(void)
1828 {
1829 struct cpuid cpuid;
1830
1831 get_cpu_id(&cpuid);
1832 cpuid.version = 0xff;
1833 return *((u64 *) &cpuid);
1834 }
1835
1836 static void kvm_s390_crypto_init(struct kvm *kvm)
1837 {
1838 if (!test_kvm_facility(kvm, 76))
1839 return;
1840
1841 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1842 kvm_s390_set_crycb_format(kvm);
1843
1844 /* Enable AES/DEA protected key functions by default */
1845 kvm->arch.crypto.aes_kw = 1;
1846 kvm->arch.crypto.dea_kw = 1;
1847 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1848 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1849 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1850 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1851 }
1852
1853 static void sca_dispose(struct kvm *kvm)
1854 {
1855 if (kvm->arch.use_esca)
1856 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1857 else
1858 free_page((unsigned long)(kvm->arch.sca));
1859 kvm->arch.sca = NULL;
1860 }
1861
1862 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1863 {
1864 gfp_t alloc_flags = GFP_KERNEL;
1865 int i, rc;
1866 char debug_name[16];
1867 static unsigned long sca_offset;
1868
1869 rc = -EINVAL;
1870 #ifdef CONFIG_KVM_S390_UCONTROL
1871 if (type & ~KVM_VM_S390_UCONTROL)
1872 goto out_err;
1873 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1874 goto out_err;
1875 #else
1876 if (type)
1877 goto out_err;
1878 #endif
1879
1880 rc = s390_enable_sie();
1881 if (rc)
1882 goto out_err;
1883
1884 rc = -ENOMEM;
1885
1886 kvm->arch.use_esca = 0; /* start with basic SCA */
1887 if (!sclp.has_64bscao)
1888 alloc_flags |= GFP_DMA;
1889 rwlock_init(&kvm->arch.sca_lock);
1890 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1891 if (!kvm->arch.sca)
1892 goto out_err;
1893 spin_lock(&kvm_lock);
1894 sca_offset += 16;
1895 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1896 sca_offset = 0;
1897 kvm->arch.sca = (struct bsca_block *)
1898 ((char *) kvm->arch.sca + sca_offset);
1899 spin_unlock(&kvm_lock);
1900
1901 sprintf(debug_name, "kvm-%u", current->pid);
1902
1903 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1904 if (!kvm->arch.dbf)
1905 goto out_err;
1906
1907 kvm->arch.sie_page2 =
1908 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1909 if (!kvm->arch.sie_page2)
1910 goto out_err;
1911
1912 /* Populate the facility mask initially. */
1913 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1914 sizeof(S390_lowcore.stfle_fac_list));
1915 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1916 if (i < kvm_s390_fac_list_mask_size())
1917 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1918 else
1919 kvm->arch.model.fac_mask[i] = 0UL;
1920 }
1921
1922 /* Populate the facility list initially. */
1923 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1924 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1925 S390_ARCH_FAC_LIST_SIZE_BYTE);
1926
1927 /* we are always in czam mode - even on pre z14 machines */
1928 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1929 set_kvm_facility(kvm->arch.model.fac_list, 138);
1930 /* we emulate STHYI in kvm */
1931 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1932 set_kvm_facility(kvm->arch.model.fac_list, 74);
1933 if (MACHINE_HAS_TLB_GUEST) {
1934 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1935 set_kvm_facility(kvm->arch.model.fac_list, 147);
1936 }
1937
1938 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1939 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1940
1941 kvm_s390_crypto_init(kvm);
1942
1943 mutex_init(&kvm->arch.float_int.ais_lock);
1944 kvm->arch.float_int.simm = 0;
1945 kvm->arch.float_int.nimm = 0;
1946 spin_lock_init(&kvm->arch.float_int.lock);
1947 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1948 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1949 init_waitqueue_head(&kvm->arch.ipte_wq);
1950 mutex_init(&kvm->arch.ipte_mutex);
1951
1952 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1953 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1954
1955 if (type & KVM_VM_S390_UCONTROL) {
1956 kvm->arch.gmap = NULL;
1957 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1958 } else {
1959 if (sclp.hamax == U64_MAX)
1960 kvm->arch.mem_limit = TASK_SIZE_MAX;
1961 else
1962 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1963 sclp.hamax + 1);
1964 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1965 if (!kvm->arch.gmap)
1966 goto out_err;
1967 kvm->arch.gmap->private = kvm;
1968 kvm->arch.gmap->pfault_enabled = 0;
1969 }
1970
1971 kvm->arch.css_support = 0;
1972 kvm->arch.use_irqchip = 0;
1973 kvm->arch.epoch = 0;
1974
1975 spin_lock_init(&kvm->arch.start_stop_lock);
1976 kvm_s390_vsie_init(kvm);
1977 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1978
1979 return 0;
1980 out_err:
1981 free_page((unsigned long)kvm->arch.sie_page2);
1982 debug_unregister(kvm->arch.dbf);
1983 sca_dispose(kvm);
1984 KVM_EVENT(3, "creation of vm failed: %d", rc);
1985 return rc;
1986 }
1987
1988 bool kvm_arch_has_vcpu_debugfs(void)
1989 {
1990 return false;
1991 }
1992
1993 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1994 {
1995 return 0;
1996 }
1997
1998 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1999 {
2000 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2001 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2002 kvm_s390_clear_local_irqs(vcpu);
2003 kvm_clear_async_pf_completion_queue(vcpu);
2004 if (!kvm_is_ucontrol(vcpu->kvm))
2005 sca_del_vcpu(vcpu);
2006
2007 if (kvm_is_ucontrol(vcpu->kvm))
2008 gmap_remove(vcpu->arch.gmap);
2009
2010 if (vcpu->kvm->arch.use_cmma)
2011 kvm_s390_vcpu_unsetup_cmma(vcpu);
2012 free_page((unsigned long)(vcpu->arch.sie_block));
2013
2014 kvm_vcpu_uninit(vcpu);
2015 kmem_cache_free(kvm_vcpu_cache, vcpu);
2016 }
2017
2018 static void kvm_free_vcpus(struct kvm *kvm)
2019 {
2020 unsigned int i;
2021 struct kvm_vcpu *vcpu;
2022
2023 kvm_for_each_vcpu(i, vcpu, kvm)
2024 kvm_arch_vcpu_destroy(vcpu);
2025
2026 mutex_lock(&kvm->lock);
2027 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2028 kvm->vcpus[i] = NULL;
2029
2030 atomic_set(&kvm->online_vcpus, 0);
2031 mutex_unlock(&kvm->lock);
2032 }
2033
2034 void kvm_arch_destroy_vm(struct kvm *kvm)
2035 {
2036 kvm_free_vcpus(kvm);
2037 sca_dispose(kvm);
2038 debug_unregister(kvm->arch.dbf);
2039 free_page((unsigned long)kvm->arch.sie_page2);
2040 if (!kvm_is_ucontrol(kvm))
2041 gmap_remove(kvm->arch.gmap);
2042 kvm_s390_destroy_adapters(kvm);
2043 kvm_s390_clear_float_irqs(kvm);
2044 kvm_s390_vsie_destroy(kvm);
2045 if (kvm->arch.migration_state) {
2046 vfree(kvm->arch.migration_state->pgste_bitmap);
2047 kfree(kvm->arch.migration_state);
2048 }
2049 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2050 }
2051
2052 /* Section: vcpu related */
2053 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2054 {
2055 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2056 if (!vcpu->arch.gmap)
2057 return -ENOMEM;
2058 vcpu->arch.gmap->private = vcpu->kvm;
2059
2060 return 0;
2061 }
2062
2063 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2064 {
2065 if (!kvm_s390_use_sca_entries())
2066 return;
2067 read_lock(&vcpu->kvm->arch.sca_lock);
2068 if (vcpu->kvm->arch.use_esca) {
2069 struct esca_block *sca = vcpu->kvm->arch.sca;
2070
2071 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2072 sca->cpu[vcpu->vcpu_id].sda = 0;
2073 } else {
2074 struct bsca_block *sca = vcpu->kvm->arch.sca;
2075
2076 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2077 sca->cpu[vcpu->vcpu_id].sda = 0;
2078 }
2079 read_unlock(&vcpu->kvm->arch.sca_lock);
2080 }
2081
2082 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2083 {
2084 if (!kvm_s390_use_sca_entries()) {
2085 struct bsca_block *sca = vcpu->kvm->arch.sca;
2086
2087 /* we still need the basic sca for the ipte control */
2088 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2089 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2090 }
2091 read_lock(&vcpu->kvm->arch.sca_lock);
2092 if (vcpu->kvm->arch.use_esca) {
2093 struct esca_block *sca = vcpu->kvm->arch.sca;
2094
2095 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2096 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2097 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2098 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2099 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2100 } else {
2101 struct bsca_block *sca = vcpu->kvm->arch.sca;
2102
2103 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2104 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2105 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2106 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2107 }
2108 read_unlock(&vcpu->kvm->arch.sca_lock);
2109 }
2110
2111 /* Basic SCA to Extended SCA data copy routines */
2112 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2113 {
2114 d->sda = s->sda;
2115 d->sigp_ctrl.c = s->sigp_ctrl.c;
2116 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2117 }
2118
2119 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2120 {
2121 int i;
2122
2123 d->ipte_control = s->ipte_control;
2124 d->mcn[0] = s->mcn;
2125 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2126 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2127 }
2128
2129 static int sca_switch_to_extended(struct kvm *kvm)
2130 {
2131 struct bsca_block *old_sca = kvm->arch.sca;
2132 struct esca_block *new_sca;
2133 struct kvm_vcpu *vcpu;
2134 unsigned int vcpu_idx;
2135 u32 scaol, scaoh;
2136
2137 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2138 if (!new_sca)
2139 return -ENOMEM;
2140
2141 scaoh = (u32)((u64)(new_sca) >> 32);
2142 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2143
2144 kvm_s390_vcpu_block_all(kvm);
2145 write_lock(&kvm->arch.sca_lock);
2146
2147 sca_copy_b_to_e(new_sca, old_sca);
2148
2149 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2150 vcpu->arch.sie_block->scaoh = scaoh;
2151 vcpu->arch.sie_block->scaol = scaol;
2152 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2153 }
2154 kvm->arch.sca = new_sca;
2155 kvm->arch.use_esca = 1;
2156
2157 write_unlock(&kvm->arch.sca_lock);
2158 kvm_s390_vcpu_unblock_all(kvm);
2159
2160 free_page((unsigned long)old_sca);
2161
2162 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2163 old_sca, kvm->arch.sca);
2164 return 0;
2165 }
2166
2167 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2168 {
2169 int rc;
2170
2171 if (!kvm_s390_use_sca_entries()) {
2172 if (id < KVM_MAX_VCPUS)
2173 return true;
2174 return false;
2175 }
2176 if (id < KVM_S390_BSCA_CPU_SLOTS)
2177 return true;
2178 if (!sclp.has_esca || !sclp.has_64bscao)
2179 return false;
2180
2181 mutex_lock(&kvm->lock);
2182 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2183 mutex_unlock(&kvm->lock);
2184
2185 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2186 }
2187
2188 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2189 {
2190 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2191 kvm_clear_async_pf_completion_queue(vcpu);
2192 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2193 KVM_SYNC_GPRS |
2194 KVM_SYNC_ACRS |
2195 KVM_SYNC_CRS |
2196 KVM_SYNC_ARCH0 |
2197 KVM_SYNC_PFAULT;
2198 kvm_s390_set_prefix(vcpu, 0);
2199 if (test_kvm_facility(vcpu->kvm, 64))
2200 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2201 if (test_kvm_facility(vcpu->kvm, 133))
2202 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2203 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2204 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2205 */
2206 if (MACHINE_HAS_VX)
2207 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2208 else
2209 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2210
2211 if (kvm_is_ucontrol(vcpu->kvm))
2212 return __kvm_ucontrol_vcpu_init(vcpu);
2213
2214 return 0;
2215 }
2216
2217 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2218 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2219 {
2220 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2221 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2222 vcpu->arch.cputm_start = get_tod_clock_fast();
2223 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2224 }
2225
2226 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2227 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2228 {
2229 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2230 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2231 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2232 vcpu->arch.cputm_start = 0;
2233 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2234 }
2235
2236 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2237 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2238 {
2239 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2240 vcpu->arch.cputm_enabled = true;
2241 __start_cpu_timer_accounting(vcpu);
2242 }
2243
2244 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2245 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2246 {
2247 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2248 __stop_cpu_timer_accounting(vcpu);
2249 vcpu->arch.cputm_enabled = false;
2250 }
2251
2252 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2253 {
2254 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2255 __enable_cpu_timer_accounting(vcpu);
2256 preempt_enable();
2257 }
2258
2259 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2260 {
2261 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2262 __disable_cpu_timer_accounting(vcpu);
2263 preempt_enable();
2264 }
2265
2266 /* set the cpu timer - may only be called from the VCPU thread itself */
2267 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2268 {
2269 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2270 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2271 if (vcpu->arch.cputm_enabled)
2272 vcpu->arch.cputm_start = get_tod_clock_fast();
2273 vcpu->arch.sie_block->cputm = cputm;
2274 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2275 preempt_enable();
2276 }
2277
2278 /* update and get the cpu timer - can also be called from other VCPU threads */
2279 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2280 {
2281 unsigned int seq;
2282 __u64 value;
2283
2284 if (unlikely(!vcpu->arch.cputm_enabled))
2285 return vcpu->arch.sie_block->cputm;
2286
2287 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2288 do {
2289 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2290 /*
2291 * If the writer would ever execute a read in the critical
2292 * section, e.g. in irq context, we have a deadlock.
2293 */
2294 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2295 value = vcpu->arch.sie_block->cputm;
2296 /* if cputm_start is 0, accounting is being started/stopped */
2297 if (likely(vcpu->arch.cputm_start))
2298 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2299 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2300 preempt_enable();
2301 return value;
2302 }
2303
2304 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2305 {
2306
2307 gmap_enable(vcpu->arch.enabled_gmap);
2308 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2309 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2310 __start_cpu_timer_accounting(vcpu);
2311 vcpu->cpu = cpu;
2312 }
2313
2314 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2315 {
2316 vcpu->cpu = -1;
2317 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2318 __stop_cpu_timer_accounting(vcpu);
2319 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2320 vcpu->arch.enabled_gmap = gmap_get_enabled();
2321 gmap_disable(vcpu->arch.enabled_gmap);
2322
2323 }
2324
2325 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2326 {
2327 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2328 vcpu->arch.sie_block->gpsw.mask = 0UL;
2329 vcpu->arch.sie_block->gpsw.addr = 0UL;
2330 kvm_s390_set_prefix(vcpu, 0);
2331 kvm_s390_set_cpu_timer(vcpu, 0);
2332 vcpu->arch.sie_block->ckc = 0UL;
2333 vcpu->arch.sie_block->todpr = 0;
2334 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2335 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2336 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2337 /* make sure the new fpc will be lazily loaded */
2338 save_fpu_regs();
2339 current->thread.fpu.fpc = 0;
2340 vcpu->arch.sie_block->gbea = 1;
2341 vcpu->arch.sie_block->pp = 0;
2342 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2343 kvm_clear_async_pf_completion_queue(vcpu);
2344 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2345 kvm_s390_vcpu_stop(vcpu);
2346 kvm_s390_clear_local_irqs(vcpu);
2347 }
2348
2349 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2350 {
2351 mutex_lock(&vcpu->kvm->lock);
2352 preempt_disable();
2353 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2354 preempt_enable();
2355 mutex_unlock(&vcpu->kvm->lock);
2356 if (!kvm_is_ucontrol(vcpu->kvm)) {
2357 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2358 sca_add_vcpu(vcpu);
2359 }
2360 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2361 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2362 /* make vcpu_load load the right gmap on the first trigger */
2363 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2364 }
2365
2366 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2367 {
2368 if (!test_kvm_facility(vcpu->kvm, 76))
2369 return;
2370
2371 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2372
2373 if (vcpu->kvm->arch.crypto.aes_kw)
2374 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2375 if (vcpu->kvm->arch.crypto.dea_kw)
2376 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2377
2378 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2379 }
2380
2381 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2382 {
2383 free_page(vcpu->arch.sie_block->cbrlo);
2384 vcpu->arch.sie_block->cbrlo = 0;
2385 }
2386
2387 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2388 {
2389 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2390 if (!vcpu->arch.sie_block->cbrlo)
2391 return -ENOMEM;
2392
2393 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2394 return 0;
2395 }
2396
2397 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2398 {
2399 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2400
2401 vcpu->arch.sie_block->ibc = model->ibc;
2402 if (test_kvm_facility(vcpu->kvm, 7))
2403 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2404 }
2405
2406 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2407 {
2408 int rc = 0;
2409
2410 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2411 CPUSTAT_SM |
2412 CPUSTAT_STOPPED);
2413
2414 if (test_kvm_facility(vcpu->kvm, 78))
2415 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2416 else if (test_kvm_facility(vcpu->kvm, 8))
2417 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2418
2419 kvm_s390_vcpu_setup_model(vcpu);
2420
2421 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2422 if (MACHINE_HAS_ESOP)
2423 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2424 if (test_kvm_facility(vcpu->kvm, 9))
2425 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2426 if (test_kvm_facility(vcpu->kvm, 73))
2427 vcpu->arch.sie_block->ecb |= ECB_TE;
2428
2429 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2430 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2431 if (test_kvm_facility(vcpu->kvm, 130))
2432 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2433 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2434 if (sclp.has_cei)
2435 vcpu->arch.sie_block->eca |= ECA_CEI;
2436 if (sclp.has_ib)
2437 vcpu->arch.sie_block->eca |= ECA_IB;
2438 if (sclp.has_siif)
2439 vcpu->arch.sie_block->eca |= ECA_SII;
2440 if (sclp.has_sigpif)
2441 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2442 if (test_kvm_facility(vcpu->kvm, 129)) {
2443 vcpu->arch.sie_block->eca |= ECA_VX;
2444 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2445 }
2446 if (test_kvm_facility(vcpu->kvm, 139))
2447 vcpu->arch.sie_block->ecd |= ECD_MEF;
2448
2449 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2450 | SDNXC;
2451 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2452
2453 if (sclp.has_kss)
2454 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2455 else
2456 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2457
2458 if (vcpu->kvm->arch.use_cmma) {
2459 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2460 if (rc)
2461 return rc;
2462 }
2463 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2464 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2465
2466 kvm_s390_vcpu_crypto_setup(vcpu);
2467
2468 return rc;
2469 }
2470
2471 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2472 unsigned int id)
2473 {
2474 struct kvm_vcpu *vcpu;
2475 struct sie_page *sie_page;
2476 int rc = -EINVAL;
2477
2478 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2479 goto out;
2480
2481 rc = -ENOMEM;
2482
2483 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2484 if (!vcpu)
2485 goto out;
2486
2487 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2488 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2489 if (!sie_page)
2490 goto out_free_cpu;
2491
2492 vcpu->arch.sie_block = &sie_page->sie_block;
2493 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2494
2495 /* the real guest size will always be smaller than msl */
2496 vcpu->arch.sie_block->mso = 0;
2497 vcpu->arch.sie_block->msl = sclp.hamax;
2498
2499 vcpu->arch.sie_block->icpua = id;
2500 spin_lock_init(&vcpu->arch.local_int.lock);
2501 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2502 vcpu->arch.local_int.wq = &vcpu->wq;
2503 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2504 seqcount_init(&vcpu->arch.cputm_seqcount);
2505
2506 rc = kvm_vcpu_init(vcpu, kvm, id);
2507 if (rc)
2508 goto out_free_sie_block;
2509 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2510 vcpu->arch.sie_block);
2511 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2512
2513 return vcpu;
2514 out_free_sie_block:
2515 free_page((unsigned long)(vcpu->arch.sie_block));
2516 out_free_cpu:
2517 kmem_cache_free(kvm_vcpu_cache, vcpu);
2518 out:
2519 return ERR_PTR(rc);
2520 }
2521
2522 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2523 {
2524 return kvm_s390_vcpu_has_irq(vcpu, 0);
2525 }
2526
2527 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2528 {
2529 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2530 }
2531
2532 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2533 {
2534 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2535 exit_sie(vcpu);
2536 }
2537
2538 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2539 {
2540 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2541 }
2542
2543 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2544 {
2545 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2546 exit_sie(vcpu);
2547 }
2548
2549 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2550 {
2551 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2552 }
2553
2554 /*
2555 * Kick a guest cpu out of SIE and wait until SIE is not running.
2556 * If the CPU is not running (e.g. waiting as idle) the function will
2557 * return immediately. */
2558 void exit_sie(struct kvm_vcpu *vcpu)
2559 {
2560 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2561 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2562 cpu_relax();
2563 }
2564
2565 /* Kick a guest cpu out of SIE to process a request synchronously */
2566 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2567 {
2568 kvm_make_request(req, vcpu);
2569 kvm_s390_vcpu_request(vcpu);
2570 }
2571
2572 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2573 unsigned long end)
2574 {
2575 struct kvm *kvm = gmap->private;
2576 struct kvm_vcpu *vcpu;
2577 unsigned long prefix;
2578 int i;
2579
2580 if (gmap_is_shadow(gmap))
2581 return;
2582 if (start >= 1UL << 31)
2583 /* We are only interested in prefix pages */
2584 return;
2585 kvm_for_each_vcpu(i, vcpu, kvm) {
2586 /* match against both prefix pages */
2587 prefix = kvm_s390_get_prefix(vcpu);
2588 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2589 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2590 start, end);
2591 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2592 }
2593 }
2594 }
2595
2596 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2597 {
2598 /* kvm common code refers to this, but never calls it */
2599 BUG();
2600 return 0;
2601 }
2602
2603 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2604 struct kvm_one_reg *reg)
2605 {
2606 int r = -EINVAL;
2607
2608 switch (reg->id) {
2609 case KVM_REG_S390_TODPR:
2610 r = put_user(vcpu->arch.sie_block->todpr,
2611 (u32 __user *)reg->addr);
2612 break;
2613 case KVM_REG_S390_EPOCHDIFF:
2614 r = put_user(vcpu->arch.sie_block->epoch,
2615 (u64 __user *)reg->addr);
2616 break;
2617 case KVM_REG_S390_CPU_TIMER:
2618 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2619 (u64 __user *)reg->addr);
2620 break;
2621 case KVM_REG_S390_CLOCK_COMP:
2622 r = put_user(vcpu->arch.sie_block->ckc,
2623 (u64 __user *)reg->addr);
2624 break;
2625 case KVM_REG_S390_PFTOKEN:
2626 r = put_user(vcpu->arch.pfault_token,
2627 (u64 __user *)reg->addr);
2628 break;
2629 case KVM_REG_S390_PFCOMPARE:
2630 r = put_user(vcpu->arch.pfault_compare,
2631 (u64 __user *)reg->addr);
2632 break;
2633 case KVM_REG_S390_PFSELECT:
2634 r = put_user(vcpu->arch.pfault_select,
2635 (u64 __user *)reg->addr);
2636 break;
2637 case KVM_REG_S390_PP:
2638 r = put_user(vcpu->arch.sie_block->pp,
2639 (u64 __user *)reg->addr);
2640 break;
2641 case KVM_REG_S390_GBEA:
2642 r = put_user(vcpu->arch.sie_block->gbea,
2643 (u64 __user *)reg->addr);
2644 break;
2645 default:
2646 break;
2647 }
2648
2649 return r;
2650 }
2651
2652 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2653 struct kvm_one_reg *reg)
2654 {
2655 int r = -EINVAL;
2656 __u64 val;
2657
2658 switch (reg->id) {
2659 case KVM_REG_S390_TODPR:
2660 r = get_user(vcpu->arch.sie_block->todpr,
2661 (u32 __user *)reg->addr);
2662 break;
2663 case KVM_REG_S390_EPOCHDIFF:
2664 r = get_user(vcpu->arch.sie_block->epoch,
2665 (u64 __user *)reg->addr);
2666 break;
2667 case KVM_REG_S390_CPU_TIMER:
2668 r = get_user(val, (u64 __user *)reg->addr);
2669 if (!r)
2670 kvm_s390_set_cpu_timer(vcpu, val);
2671 break;
2672 case KVM_REG_S390_CLOCK_COMP:
2673 r = get_user(vcpu->arch.sie_block->ckc,
2674 (u64 __user *)reg->addr);
2675 break;
2676 case KVM_REG_S390_PFTOKEN:
2677 r = get_user(vcpu->arch.pfault_token,
2678 (u64 __user *)reg->addr);
2679 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2680 kvm_clear_async_pf_completion_queue(vcpu);
2681 break;
2682 case KVM_REG_S390_PFCOMPARE:
2683 r = get_user(vcpu->arch.pfault_compare,
2684 (u64 __user *)reg->addr);
2685 break;
2686 case KVM_REG_S390_PFSELECT:
2687 r = get_user(vcpu->arch.pfault_select,
2688 (u64 __user *)reg->addr);
2689 break;
2690 case KVM_REG_S390_PP:
2691 r = get_user(vcpu->arch.sie_block->pp,
2692 (u64 __user *)reg->addr);
2693 break;
2694 case KVM_REG_S390_GBEA:
2695 r = get_user(vcpu->arch.sie_block->gbea,
2696 (u64 __user *)reg->addr);
2697 break;
2698 default:
2699 break;
2700 }
2701
2702 return r;
2703 }
2704
2705 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2706 {
2707 kvm_s390_vcpu_initial_reset(vcpu);
2708 return 0;
2709 }
2710
2711 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2712 {
2713 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2714 return 0;
2715 }
2716
2717 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2718 {
2719 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2720 return 0;
2721 }
2722
2723 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2724 struct kvm_sregs *sregs)
2725 {
2726 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2727 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2728 return 0;
2729 }
2730
2731 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2732 struct kvm_sregs *sregs)
2733 {
2734 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2735 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2736 return 0;
2737 }
2738
2739 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2740 {
2741 if (test_fp_ctl(fpu->fpc))
2742 return -EINVAL;
2743 vcpu->run->s.regs.fpc = fpu->fpc;
2744 if (MACHINE_HAS_VX)
2745 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2746 (freg_t *) fpu->fprs);
2747 else
2748 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2749 return 0;
2750 }
2751
2752 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2753 {
2754 /* make sure we have the latest values */
2755 save_fpu_regs();
2756 if (MACHINE_HAS_VX)
2757 convert_vx_to_fp((freg_t *) fpu->fprs,
2758 (__vector128 *) vcpu->run->s.regs.vrs);
2759 else
2760 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2761 fpu->fpc = vcpu->run->s.regs.fpc;
2762 return 0;
2763 }
2764
2765 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2766 {
2767 int rc = 0;
2768
2769 if (!is_vcpu_stopped(vcpu))
2770 rc = -EBUSY;
2771 else {
2772 vcpu->run->psw_mask = psw.mask;
2773 vcpu->run->psw_addr = psw.addr;
2774 }
2775 return rc;
2776 }
2777
2778 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2779 struct kvm_translation *tr)
2780 {
2781 return -EINVAL; /* not implemented yet */
2782 }
2783
2784 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2785 KVM_GUESTDBG_USE_HW_BP | \
2786 KVM_GUESTDBG_ENABLE)
2787
2788 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2789 struct kvm_guest_debug *dbg)
2790 {
2791 int rc = 0;
2792
2793 vcpu->guest_debug = 0;
2794 kvm_s390_clear_bp_data(vcpu);
2795
2796 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2797 return -EINVAL;
2798 if (!sclp.has_gpere)
2799 return -EINVAL;
2800
2801 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2802 vcpu->guest_debug = dbg->control;
2803 /* enforce guest PER */
2804 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2805
2806 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2807 rc = kvm_s390_import_bp_data(vcpu, dbg);
2808 } else {
2809 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2810 vcpu->arch.guestdbg.last_bp = 0;
2811 }
2812
2813 if (rc) {
2814 vcpu->guest_debug = 0;
2815 kvm_s390_clear_bp_data(vcpu);
2816 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2817 }
2818
2819 return rc;
2820 }
2821
2822 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2823 struct kvm_mp_state *mp_state)
2824 {
2825 /* CHECK_STOP and LOAD are not supported yet */
2826 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2827 KVM_MP_STATE_OPERATING;
2828 }
2829
2830 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2831 struct kvm_mp_state *mp_state)
2832 {
2833 int rc = 0;
2834
2835 /* user space knows about this interface - let it control the state */
2836 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2837
2838 switch (mp_state->mp_state) {
2839 case KVM_MP_STATE_STOPPED:
2840 kvm_s390_vcpu_stop(vcpu);
2841 break;
2842 case KVM_MP_STATE_OPERATING:
2843 kvm_s390_vcpu_start(vcpu);
2844 break;
2845 case KVM_MP_STATE_LOAD:
2846 case KVM_MP_STATE_CHECK_STOP:
2847 /* fall through - CHECK_STOP and LOAD are not supported yet */
2848 default:
2849 rc = -ENXIO;
2850 }
2851
2852 return rc;
2853 }
2854
2855 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2856 {
2857 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2858 }
2859
2860 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2861 {
2862 retry:
2863 kvm_s390_vcpu_request_handled(vcpu);
2864 if (!kvm_request_pending(vcpu))
2865 return 0;
2866 /*
2867 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2868 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2869 * This ensures that the ipte instruction for this request has
2870 * already finished. We might race against a second unmapper that
2871 * wants to set the blocking bit. Lets just retry the request loop.
2872 */
2873 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2874 int rc;
2875 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2876 kvm_s390_get_prefix(vcpu),
2877 PAGE_SIZE * 2, PROT_WRITE);
2878 if (rc) {
2879 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2880 return rc;
2881 }
2882 goto retry;
2883 }
2884
2885 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2886 vcpu->arch.sie_block->ihcpu = 0xffff;
2887 goto retry;
2888 }
2889
2890 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2891 if (!ibs_enabled(vcpu)) {
2892 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2893 atomic_or(CPUSTAT_IBS,
2894 &vcpu->arch.sie_block->cpuflags);
2895 }
2896 goto retry;
2897 }
2898
2899 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2900 if (ibs_enabled(vcpu)) {
2901 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2902 atomic_andnot(CPUSTAT_IBS,
2903 &vcpu->arch.sie_block->cpuflags);
2904 }
2905 goto retry;
2906 }
2907
2908 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2909 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2910 goto retry;
2911 }
2912
2913 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2914 /*
2915 * Disable CMMA virtualization; we will emulate the ESSA
2916 * instruction manually, in order to provide additional
2917 * functionalities needed for live migration.
2918 */
2919 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2920 goto retry;
2921 }
2922
2923 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2924 /*
2925 * Re-enable CMMA virtualization if CMMA is available and
2926 * was used.
2927 */
2928 if ((vcpu->kvm->arch.use_cmma) &&
2929 (vcpu->kvm->mm->context.use_cmma))
2930 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2931 goto retry;
2932 }
2933
2934 /* nothing to do, just clear the request */
2935 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2936
2937 return 0;
2938 }
2939
2940 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2941 const struct kvm_s390_vm_tod_clock *gtod)
2942 {
2943 struct kvm_vcpu *vcpu;
2944 struct kvm_s390_tod_clock_ext htod;
2945 int i;
2946
2947 mutex_lock(&kvm->lock);
2948 preempt_disable();
2949
2950 get_tod_clock_ext((char *)&htod);
2951
2952 kvm->arch.epoch = gtod->tod - htod.tod;
2953 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2954
2955 if (kvm->arch.epoch > gtod->tod)
2956 kvm->arch.epdx -= 1;
2957
2958 kvm_s390_vcpu_block_all(kvm);
2959 kvm_for_each_vcpu(i, vcpu, kvm) {
2960 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2961 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2962 }
2963
2964 kvm_s390_vcpu_unblock_all(kvm);
2965 preempt_enable();
2966 mutex_unlock(&kvm->lock);
2967 }
2968
2969 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2970 {
2971 struct kvm_vcpu *vcpu;
2972 int i;
2973
2974 mutex_lock(&kvm->lock);
2975 preempt_disable();
2976 kvm->arch.epoch = tod - get_tod_clock();
2977 kvm_s390_vcpu_block_all(kvm);
2978 kvm_for_each_vcpu(i, vcpu, kvm)
2979 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2980 kvm_s390_vcpu_unblock_all(kvm);
2981 preempt_enable();
2982 mutex_unlock(&kvm->lock);
2983 }
2984
2985 /**
2986 * kvm_arch_fault_in_page - fault-in guest page if necessary
2987 * @vcpu: The corresponding virtual cpu
2988 * @gpa: Guest physical address
2989 * @writable: Whether the page should be writable or not
2990 *
2991 * Make sure that a guest page has been faulted-in on the host.
2992 *
2993 * Return: Zero on success, negative error code otherwise.
2994 */
2995 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2996 {
2997 return gmap_fault(vcpu->arch.gmap, gpa,
2998 writable ? FAULT_FLAG_WRITE : 0);
2999 }
3000
3001 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3002 unsigned long token)
3003 {
3004 struct kvm_s390_interrupt inti;
3005 struct kvm_s390_irq irq;
3006
3007 if (start_token) {
3008 irq.u.ext.ext_params2 = token;
3009 irq.type = KVM_S390_INT_PFAULT_INIT;
3010 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3011 } else {
3012 inti.type = KVM_S390_INT_PFAULT_DONE;
3013 inti.parm64 = token;
3014 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3015 }
3016 }
3017
3018 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3019 struct kvm_async_pf *work)
3020 {
3021 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3022 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3023 }
3024
3025 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3026 struct kvm_async_pf *work)
3027 {
3028 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3029 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3030 }
3031
3032 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3033 struct kvm_async_pf *work)
3034 {
3035 /* s390 will always inject the page directly */
3036 }
3037
3038 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3039 {
3040 /*
3041 * s390 will always inject the page directly,
3042 * but we still want check_async_completion to cleanup
3043 */
3044 return true;
3045 }
3046
3047 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3048 {
3049 hva_t hva;
3050 struct kvm_arch_async_pf arch;
3051 int rc;
3052
3053 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3054 return 0;
3055 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3056 vcpu->arch.pfault_compare)
3057 return 0;
3058 if (psw_extint_disabled(vcpu))
3059 return 0;
3060 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3061 return 0;
3062 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3063 return 0;
3064 if (!vcpu->arch.gmap->pfault_enabled)
3065 return 0;
3066
3067 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3068 hva += current->thread.gmap_addr & ~PAGE_MASK;
3069 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3070 return 0;
3071
3072 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3073 return rc;
3074 }
3075
3076 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3077 {
3078 int rc, cpuflags;
3079
3080 /*
3081 * On s390 notifications for arriving pages will be delivered directly
3082 * to the guest but the house keeping for completed pfaults is
3083 * handled outside the worker.
3084 */
3085 kvm_check_async_pf_completion(vcpu);
3086
3087 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3088 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3089
3090 if (need_resched())
3091 schedule();
3092
3093 if (test_cpu_flag(CIF_MCCK_PENDING))
3094 s390_handle_mcck();
3095
3096 if (!kvm_is_ucontrol(vcpu->kvm)) {
3097 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3098 if (rc)
3099 return rc;
3100 }
3101
3102 rc = kvm_s390_handle_requests(vcpu);
3103 if (rc)
3104 return rc;
3105
3106 if (guestdbg_enabled(vcpu)) {
3107 kvm_s390_backup_guest_per_regs(vcpu);
3108 kvm_s390_patch_guest_per_regs(vcpu);
3109 }
3110
3111 vcpu->arch.sie_block->icptcode = 0;
3112 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3113 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3114 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3115
3116 return 0;
3117 }
3118
3119 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3120 {
3121 struct kvm_s390_pgm_info pgm_info = {
3122 .code = PGM_ADDRESSING,
3123 };
3124 u8 opcode, ilen;
3125 int rc;
3126
3127 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3128 trace_kvm_s390_sie_fault(vcpu);
3129
3130 /*
3131 * We want to inject an addressing exception, which is defined as a
3132 * suppressing or terminating exception. However, since we came here
3133 * by a DAT access exception, the PSW still points to the faulting
3134 * instruction since DAT exceptions are nullifying. So we've got
3135 * to look up the current opcode to get the length of the instruction
3136 * to be able to forward the PSW.
3137 */
3138 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3139 ilen = insn_length(opcode);
3140 if (rc < 0) {
3141 return rc;
3142 } else if (rc) {
3143 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3144 * Forward by arbitrary ilc, injection will take care of
3145 * nullification if necessary.
3146 */
3147 pgm_info = vcpu->arch.pgm;
3148 ilen = 4;
3149 }
3150 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3151 kvm_s390_forward_psw(vcpu, ilen);
3152 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3153 }
3154
3155 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3156 {
3157 struct mcck_volatile_info *mcck_info;
3158 struct sie_page *sie_page;
3159
3160 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3161 vcpu->arch.sie_block->icptcode);
3162 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3163
3164 if (guestdbg_enabled(vcpu))
3165 kvm_s390_restore_guest_per_regs(vcpu);
3166
3167 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3168 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3169
3170 if (exit_reason == -EINTR) {
3171 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3172 sie_page = container_of(vcpu->arch.sie_block,
3173 struct sie_page, sie_block);
3174 mcck_info = &sie_page->mcck_info;
3175 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3176 return 0;
3177 }
3178
3179 if (vcpu->arch.sie_block->icptcode > 0) {
3180 int rc = kvm_handle_sie_intercept(vcpu);
3181
3182 if (rc != -EOPNOTSUPP)
3183 return rc;
3184 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3185 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3186 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3187 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3188 return -EREMOTE;
3189 } else if (exit_reason != -EFAULT) {
3190 vcpu->stat.exit_null++;
3191 return 0;
3192 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3193 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3194 vcpu->run->s390_ucontrol.trans_exc_code =
3195 current->thread.gmap_addr;
3196 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3197 return -EREMOTE;
3198 } else if (current->thread.gmap_pfault) {
3199 trace_kvm_s390_major_guest_pfault(vcpu);
3200 current->thread.gmap_pfault = 0;
3201 if (kvm_arch_setup_async_pf(vcpu))
3202 return 0;
3203 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3204 }
3205 return vcpu_post_run_fault_in_sie(vcpu);
3206 }
3207
3208 static int __vcpu_run(struct kvm_vcpu *vcpu)
3209 {
3210 int rc, exit_reason;
3211
3212 /*
3213 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3214 * ning the guest), so that memslots (and other stuff) are protected
3215 */
3216 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3217
3218 do {
3219 rc = vcpu_pre_run(vcpu);
3220 if (rc)
3221 break;
3222
3223 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3224 /*
3225 * As PF_VCPU will be used in fault handler, between
3226 * guest_enter and guest_exit should be no uaccess.
3227 */
3228 local_irq_disable();
3229 guest_enter_irqoff();
3230 __disable_cpu_timer_accounting(vcpu);
3231 local_irq_enable();
3232 exit_reason = sie64a(vcpu->arch.sie_block,
3233 vcpu->run->s.regs.gprs);
3234 local_irq_disable();
3235 __enable_cpu_timer_accounting(vcpu);
3236 guest_exit_irqoff();
3237 local_irq_enable();
3238 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3239
3240 rc = vcpu_post_run(vcpu, exit_reason);
3241 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3242
3243 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3244 return rc;
3245 }
3246
3247 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3248 {
3249 struct runtime_instr_cb *riccb;
3250 struct gs_cb *gscb;
3251
3252 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3253 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3254 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3255 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3256 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3257 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3258 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3259 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3260 /* some control register changes require a tlb flush */
3261 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3262 }
3263 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3264 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3265 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3266 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3267 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3268 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3269 }
3270 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3271 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3272 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3273 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3274 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3275 kvm_clear_async_pf_completion_queue(vcpu);
3276 }
3277 /*
3278 * If userspace sets the riccb (e.g. after migration) to a valid state,
3279 * we should enable RI here instead of doing the lazy enablement.
3280 */
3281 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3282 test_kvm_facility(vcpu->kvm, 64) &&
3283 riccb->v &&
3284 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3285 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3286 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3287 }
3288 /*
3289 * If userspace sets the gscb (e.g. after migration) to non-zero,
3290 * we should enable GS here instead of doing the lazy enablement.
3291 */
3292 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3293 test_kvm_facility(vcpu->kvm, 133) &&
3294 gscb->gssm &&
3295 !vcpu->arch.gs_enabled) {
3296 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3297 vcpu->arch.sie_block->ecb |= ECB_GS;
3298 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3299 vcpu->arch.gs_enabled = 1;
3300 }
3301 save_access_regs(vcpu->arch.host_acrs);
3302 restore_access_regs(vcpu->run->s.regs.acrs);
3303 /* save host (userspace) fprs/vrs */
3304 save_fpu_regs();
3305 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3306 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3307 if (MACHINE_HAS_VX)
3308 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3309 else
3310 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3311 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3312 if (test_fp_ctl(current->thread.fpu.fpc))
3313 /* User space provided an invalid FPC, let's clear it */
3314 current->thread.fpu.fpc = 0;
3315 if (MACHINE_HAS_GS) {
3316 preempt_disable();
3317 __ctl_set_bit(2, 4);
3318 if (current->thread.gs_cb) {
3319 vcpu->arch.host_gscb = current->thread.gs_cb;
3320 save_gs_cb(vcpu->arch.host_gscb);
3321 }
3322 if (vcpu->arch.gs_enabled) {
3323 current->thread.gs_cb = (struct gs_cb *)
3324 &vcpu->run->s.regs.gscb;
3325 restore_gs_cb(current->thread.gs_cb);
3326 }
3327 preempt_enable();
3328 }
3329
3330 kvm_run->kvm_dirty_regs = 0;
3331 }
3332
3333 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3334 {
3335 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3336 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3337 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3338 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3339 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3340 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3341 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3342 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3343 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3344 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3345 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3346 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3347 save_access_regs(vcpu->run->s.regs.acrs);
3348 restore_access_regs(vcpu->arch.host_acrs);
3349 /* Save guest register state */
3350 save_fpu_regs();
3351 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3352 /* Restore will be done lazily at return */
3353 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3354 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3355 if (MACHINE_HAS_GS) {
3356 __ctl_set_bit(2, 4);
3357 if (vcpu->arch.gs_enabled)
3358 save_gs_cb(current->thread.gs_cb);
3359 preempt_disable();
3360 current->thread.gs_cb = vcpu->arch.host_gscb;
3361 restore_gs_cb(vcpu->arch.host_gscb);
3362 preempt_enable();
3363 if (!vcpu->arch.host_gscb)
3364 __ctl_clear_bit(2, 4);
3365 vcpu->arch.host_gscb = NULL;
3366 }
3367
3368 }
3369
3370 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3371 {
3372 int rc;
3373
3374 if (kvm_run->immediate_exit)
3375 return -EINTR;
3376
3377 if (guestdbg_exit_pending(vcpu)) {
3378 kvm_s390_prepare_debug_exit(vcpu);
3379 return 0;
3380 }
3381
3382 kvm_sigset_activate(vcpu);
3383
3384 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3385 kvm_s390_vcpu_start(vcpu);
3386 } else if (is_vcpu_stopped(vcpu)) {
3387 pr_err_ratelimited("can't run stopped vcpu %d\n",
3388 vcpu->vcpu_id);
3389 return -EINVAL;
3390 }
3391
3392 sync_regs(vcpu, kvm_run);
3393 enable_cpu_timer_accounting(vcpu);
3394
3395 might_fault();
3396 rc = __vcpu_run(vcpu);
3397
3398 if (signal_pending(current) && !rc) {
3399 kvm_run->exit_reason = KVM_EXIT_INTR;
3400 rc = -EINTR;
3401 }
3402
3403 if (guestdbg_exit_pending(vcpu) && !rc) {
3404 kvm_s390_prepare_debug_exit(vcpu);
3405 rc = 0;
3406 }
3407
3408 if (rc == -EREMOTE) {
3409 /* userspace support is needed, kvm_run has been prepared */
3410 rc = 0;
3411 }
3412
3413 disable_cpu_timer_accounting(vcpu);
3414 store_regs(vcpu, kvm_run);
3415
3416 kvm_sigset_deactivate(vcpu);
3417
3418 vcpu->stat.exit_userspace++;
3419 return rc;
3420 }
3421
3422 /*
3423 * store status at address
3424 * we use have two special cases:
3425 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3426 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3427 */
3428 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3429 {
3430 unsigned char archmode = 1;
3431 freg_t fprs[NUM_FPRS];
3432 unsigned int px;
3433 u64 clkcomp, cputm;
3434 int rc;
3435
3436 px = kvm_s390_get_prefix(vcpu);
3437 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3438 if (write_guest_abs(vcpu, 163, &archmode, 1))
3439 return -EFAULT;
3440 gpa = 0;
3441 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3442 if (write_guest_real(vcpu, 163, &archmode, 1))
3443 return -EFAULT;
3444 gpa = px;
3445 } else
3446 gpa -= __LC_FPREGS_SAVE_AREA;
3447
3448 /* manually convert vector registers if necessary */
3449 if (MACHINE_HAS_VX) {
3450 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3451 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3452 fprs, 128);
3453 } else {
3454 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3455 vcpu->run->s.regs.fprs, 128);
3456 }
3457 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3458 vcpu->run->s.regs.gprs, 128);
3459 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3460 &vcpu->arch.sie_block->gpsw, 16);
3461 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3462 &px, 4);
3463 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3464 &vcpu->run->s.regs.fpc, 4);
3465 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3466 &vcpu->arch.sie_block->todpr, 4);
3467 cputm = kvm_s390_get_cpu_timer(vcpu);
3468 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3469 &cputm, 8);
3470 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3471 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3472 &clkcomp, 8);
3473 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3474 &vcpu->run->s.regs.acrs, 64);
3475 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3476 &vcpu->arch.sie_block->gcr, 128);
3477 return rc ? -EFAULT : 0;
3478 }
3479
3480 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3481 {
3482 /*
3483 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3484 * switch in the run ioctl. Let's update our copies before we save
3485 * it into the save area
3486 */
3487 save_fpu_regs();
3488 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3489 save_access_regs(vcpu->run->s.regs.acrs);
3490
3491 return kvm_s390_store_status_unloaded(vcpu, addr);
3492 }
3493
3494 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3495 {
3496 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3497 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3498 }
3499
3500 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3501 {
3502 unsigned int i;
3503 struct kvm_vcpu *vcpu;
3504
3505 kvm_for_each_vcpu(i, vcpu, kvm) {
3506 __disable_ibs_on_vcpu(vcpu);
3507 }
3508 }
3509
3510 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3511 {
3512 if (!sclp.has_ibs)
3513 return;
3514 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3515 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3516 }
3517
3518 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3519 {
3520 int i, online_vcpus, started_vcpus = 0;
3521
3522 if (!is_vcpu_stopped(vcpu))
3523 return;
3524
3525 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3526 /* Only one cpu at a time may enter/leave the STOPPED state. */
3527 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3528 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3529
3530 for (i = 0; i < online_vcpus; i++) {
3531 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3532 started_vcpus++;
3533 }
3534
3535 if (started_vcpus == 0) {
3536 /* we're the only active VCPU -> speed it up */
3537 __enable_ibs_on_vcpu(vcpu);
3538 } else if (started_vcpus == 1) {
3539 /*
3540 * As we are starting a second VCPU, we have to disable
3541 * the IBS facility on all VCPUs to remove potentially
3542 * oustanding ENABLE requests.
3543 */
3544 __disable_ibs_on_all_vcpus(vcpu->kvm);
3545 }
3546
3547 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3548 /*
3549 * Another VCPU might have used IBS while we were offline.
3550 * Let's play safe and flush the VCPU at startup.
3551 */
3552 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3553 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3554 return;
3555 }
3556
3557 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3558 {
3559 int i, online_vcpus, started_vcpus = 0;
3560 struct kvm_vcpu *started_vcpu = NULL;
3561
3562 if (is_vcpu_stopped(vcpu))
3563 return;
3564
3565 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3566 /* Only one cpu at a time may enter/leave the STOPPED state. */
3567 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3568 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3569
3570 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3571 kvm_s390_clear_stop_irq(vcpu);
3572
3573 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3574 __disable_ibs_on_vcpu(vcpu);
3575
3576 for (i = 0; i < online_vcpus; i++) {
3577 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3578 started_vcpus++;
3579 started_vcpu = vcpu->kvm->vcpus[i];
3580 }
3581 }
3582
3583 if (started_vcpus == 1) {
3584 /*
3585 * As we only have one VCPU left, we want to enable the
3586 * IBS facility for that VCPU to speed it up.
3587 */
3588 __enable_ibs_on_vcpu(started_vcpu);
3589 }
3590
3591 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3592 return;
3593 }
3594
3595 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3596 struct kvm_enable_cap *cap)
3597 {
3598 int r;
3599
3600 if (cap->flags)
3601 return -EINVAL;
3602
3603 switch (cap->cap) {
3604 case KVM_CAP_S390_CSS_SUPPORT:
3605 if (!vcpu->kvm->arch.css_support) {
3606 vcpu->kvm->arch.css_support = 1;
3607 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3608 trace_kvm_s390_enable_css(vcpu->kvm);
3609 }
3610 r = 0;
3611 break;
3612 default:
3613 r = -EINVAL;
3614 break;
3615 }
3616 return r;
3617 }
3618
3619 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3620 struct kvm_s390_mem_op *mop)
3621 {
3622 void __user *uaddr = (void __user *)mop->buf;
3623 void *tmpbuf = NULL;
3624 int r, srcu_idx;
3625 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3626 | KVM_S390_MEMOP_F_CHECK_ONLY;
3627
3628 if (mop->flags & ~supported_flags)
3629 return -EINVAL;
3630
3631 if (mop->size > MEM_OP_MAX_SIZE)
3632 return -E2BIG;
3633
3634 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3635 tmpbuf = vmalloc(mop->size);
3636 if (!tmpbuf)
3637 return -ENOMEM;
3638 }
3639
3640 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3641
3642 switch (mop->op) {
3643 case KVM_S390_MEMOP_LOGICAL_READ:
3644 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3645 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3646 mop->size, GACC_FETCH);
3647 break;
3648 }
3649 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3650 if (r == 0) {
3651 if (copy_to_user(uaddr, tmpbuf, mop->size))
3652 r = -EFAULT;
3653 }
3654 break;
3655 case KVM_S390_MEMOP_LOGICAL_WRITE:
3656 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3657 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3658 mop->size, GACC_STORE);
3659 break;
3660 }
3661 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3662 r = -EFAULT;
3663 break;
3664 }
3665 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3666 break;
3667 default:
3668 r = -EINVAL;
3669 }
3670
3671 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3672
3673 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3674 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3675
3676 vfree(tmpbuf);
3677 return r;
3678 }
3679
3680 long kvm_arch_vcpu_ioctl(struct file *filp,
3681 unsigned int ioctl, unsigned long arg)
3682 {
3683 struct kvm_vcpu *vcpu = filp->private_data;
3684 void __user *argp = (void __user *)arg;
3685 int idx;
3686 long r;
3687
3688 switch (ioctl) {
3689 case KVM_S390_IRQ: {
3690 struct kvm_s390_irq s390irq;
3691
3692 r = -EFAULT;
3693 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3694 break;
3695 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3696 break;
3697 }
3698 case KVM_S390_INTERRUPT: {
3699 struct kvm_s390_interrupt s390int;
3700 struct kvm_s390_irq s390irq;
3701
3702 r = -EFAULT;
3703 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3704 break;
3705 if (s390int_to_s390irq(&s390int, &s390irq))
3706 return -EINVAL;
3707 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3708 break;
3709 }
3710 case KVM_S390_STORE_STATUS:
3711 idx = srcu_read_lock(&vcpu->kvm->srcu);
3712 r = kvm_s390_vcpu_store_status(vcpu, arg);
3713 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3714 break;
3715 case KVM_S390_SET_INITIAL_PSW: {
3716 psw_t psw;
3717
3718 r = -EFAULT;
3719 if (copy_from_user(&psw, argp, sizeof(psw)))
3720 break;
3721 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3722 break;
3723 }
3724 case KVM_S390_INITIAL_RESET:
3725 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3726 break;
3727 case KVM_SET_ONE_REG:
3728 case KVM_GET_ONE_REG: {
3729 struct kvm_one_reg reg;
3730 r = -EFAULT;
3731 if (copy_from_user(&reg, argp, sizeof(reg)))
3732 break;
3733 if (ioctl == KVM_SET_ONE_REG)
3734 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3735 else
3736 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3737 break;
3738 }
3739 #ifdef CONFIG_KVM_S390_UCONTROL
3740 case KVM_S390_UCAS_MAP: {
3741 struct kvm_s390_ucas_mapping ucasmap;
3742
3743 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3744 r = -EFAULT;
3745 break;
3746 }
3747
3748 if (!kvm_is_ucontrol(vcpu->kvm)) {
3749 r = -EINVAL;
3750 break;
3751 }
3752
3753 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3754 ucasmap.vcpu_addr, ucasmap.length);
3755 break;
3756 }
3757 case KVM_S390_UCAS_UNMAP: {
3758 struct kvm_s390_ucas_mapping ucasmap;
3759
3760 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3761 r = -EFAULT;
3762 break;
3763 }
3764
3765 if (!kvm_is_ucontrol(vcpu->kvm)) {
3766 r = -EINVAL;
3767 break;
3768 }
3769
3770 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3771 ucasmap.length);
3772 break;
3773 }
3774 #endif
3775 case KVM_S390_VCPU_FAULT: {
3776 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3777 break;
3778 }
3779 case KVM_ENABLE_CAP:
3780 {
3781 struct kvm_enable_cap cap;
3782 r = -EFAULT;
3783 if (copy_from_user(&cap, argp, sizeof(cap)))
3784 break;
3785 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3786 break;
3787 }
3788 case KVM_S390_MEM_OP: {
3789 struct kvm_s390_mem_op mem_op;
3790
3791 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3792 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3793 else
3794 r = -EFAULT;
3795 break;
3796 }
3797 case KVM_S390_SET_IRQ_STATE: {
3798 struct kvm_s390_irq_state irq_state;
3799
3800 r = -EFAULT;
3801 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3802 break;
3803 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3804 irq_state.len == 0 ||
3805 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3806 r = -EINVAL;
3807 break;
3808 }
3809 /* do not use irq_state.flags, it will break old QEMUs */
3810 r = kvm_s390_set_irq_state(vcpu,
3811 (void __user *) irq_state.buf,
3812 irq_state.len);
3813 break;
3814 }
3815 case KVM_S390_GET_IRQ_STATE: {
3816 struct kvm_s390_irq_state irq_state;
3817
3818 r = -EFAULT;
3819 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3820 break;
3821 if (irq_state.len == 0) {
3822 r = -EINVAL;
3823 break;
3824 }
3825 /* do not use irq_state.flags, it will break old QEMUs */
3826 r = kvm_s390_get_irq_state(vcpu,
3827 (__u8 __user *) irq_state.buf,
3828 irq_state.len);
3829 break;
3830 }
3831 default:
3832 r = -ENOTTY;
3833 }
3834 return r;
3835 }
3836
3837 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3838 {
3839 #ifdef CONFIG_KVM_S390_UCONTROL
3840 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3841 && (kvm_is_ucontrol(vcpu->kvm))) {
3842 vmf->page = virt_to_page(vcpu->arch.sie_block);
3843 get_page(vmf->page);
3844 return 0;
3845 }
3846 #endif
3847 return VM_FAULT_SIGBUS;
3848 }
3849
3850 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3851 unsigned long npages)
3852 {
3853 return 0;
3854 }
3855
3856 /* Section: memory related */
3857 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3858 struct kvm_memory_slot *memslot,
3859 const struct kvm_userspace_memory_region *mem,
3860 enum kvm_mr_change change)
3861 {
3862 /* A few sanity checks. We can have memory slots which have to be
3863 located/ended at a segment boundary (1MB). The memory in userland is
3864 ok to be fragmented into various different vmas. It is okay to mmap()
3865 and munmap() stuff in this slot after doing this call at any time */
3866
3867 if (mem->userspace_addr & 0xffffful)
3868 return -EINVAL;
3869
3870 if (mem->memory_size & 0xffffful)
3871 return -EINVAL;
3872
3873 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3874 return -EINVAL;
3875
3876 return 0;
3877 }
3878
3879 void kvm_arch_commit_memory_region(struct kvm *kvm,
3880 const struct kvm_userspace_memory_region *mem,
3881 const struct kvm_memory_slot *old,
3882 const struct kvm_memory_slot *new,
3883 enum kvm_mr_change change)
3884 {
3885 int rc;
3886
3887 /* If the basics of the memslot do not change, we do not want
3888 * to update the gmap. Every update causes several unnecessary
3889 * segment translation exceptions. This is usually handled just
3890 * fine by the normal fault handler + gmap, but it will also
3891 * cause faults on the prefix page of running guest CPUs.
3892 */
3893 if (old->userspace_addr == mem->userspace_addr &&
3894 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3895 old->npages * PAGE_SIZE == mem->memory_size)
3896 return;
3897
3898 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3899 mem->guest_phys_addr, mem->memory_size);
3900 if (rc)
3901 pr_warn("failed to commit memory region\n");
3902 return;
3903 }
3904
3905 static inline unsigned long nonhyp_mask(int i)
3906 {
3907 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3908
3909 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3910 }
3911
3912 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3913 {
3914 vcpu->valid_wakeup = false;
3915 }
3916
3917 static int __init kvm_s390_init(void)
3918 {
3919 int i;
3920
3921 if (!sclp.has_sief2) {
3922 pr_info("SIE not available\n");
3923 return -ENODEV;
3924 }
3925
3926 for (i = 0; i < 16; i++)
3927 kvm_s390_fac_list_mask[i] |=
3928 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3929
3930 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3931 }
3932
3933 static void __exit kvm_s390_exit(void)
3934 {
3935 kvm_exit();
3936 }
3937
3938 module_init(kvm_s390_init);
3939 module_exit(kvm_s390_exit);
3940
3941 /*
3942 * Enable autoloading of the kvm module.
3943 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3944 * since x86 takes a different approach.
3945 */
3946 #include <linux/miscdevice.h>
3947 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3948 MODULE_ALIAS("devname:kvm");