]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/s390/kvm/kvm-s390.c
sched/core: Fix cpu.max vs. cpuhotplug deadlock
[mirror_ubuntu-bionic-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2017
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 { "instruction_spx", VCPU_STAT(instruction_spx) },
93 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 { "instruction_stap", VCPU_STAT(instruction_stap) },
95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 { "instruction_essa", VCPU_STAT(instruction_essa) },
100 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 { "instruction_sie", VCPU_STAT(instruction_sie) },
105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 { "diagnose_10", VCPU_STAT(diagnose_10) },
122 { "diagnose_44", VCPU_STAT(diagnose_44) },
123 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 { "diagnose_258", VCPU_STAT(diagnose_258) },
125 { "diagnose_308", VCPU_STAT(diagnose_308) },
126 { "diagnose_500", VCPU_STAT(diagnose_500) },
127 { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131 __u8 epoch_idx;
132 __u64 tod;
133 __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147 return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162 /* every s390 is virtualization enabled ;-) */
163 return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167 unsigned long end);
168
169 /*
170 * This callback is executed during stop_machine(). All CPUs are therefore
171 * temporarily stopped. In order not to change guest behavior, we have to
172 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173 * so a CPU won't be stopped while calculating with the epoch.
174 */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176 void *v)
177 {
178 struct kvm *kvm;
179 struct kvm_vcpu *vcpu;
180 int i;
181 unsigned long long *delta = v;
182
183 list_for_each_entry(kvm, &vm_list, vm_list) {
184 kvm->arch.epoch -= *delta;
185 kvm_for_each_vcpu(i, vcpu, kvm) {
186 vcpu->arch.sie_block->epoch -= *delta;
187 if (vcpu->arch.cputm_enabled)
188 vcpu->arch.cputm_start += *delta;
189 if (vcpu->arch.vsie_block)
190 vcpu->arch.vsie_block->epoch -= *delta;
191 }
192 }
193 return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197 .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202 gmap_notifier.notifier_call = kvm_gmap_notifier;
203 gmap_register_pte_notifier(&gmap_notifier);
204 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205 gmap_register_pte_notifier(&vsie_gmap_notifier);
206 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207 &kvm_clock_notifier);
208 return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213 gmap_unregister_pte_notifier(&gmap_notifier);
214 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216 &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221 set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227 int cc;
228
229 asm volatile(
230 /* Parameter registers are ignored for "test bit" */
231 " plo 0,0,0,0(0)\n"
232 " ipm %0\n"
233 " srl %0,28\n"
234 : "=d" (cc)
235 : "d" (r0)
236 : "cc");
237 return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242 int i;
243
244 for (i = 0; i < 256; ++i) {
245 if (plo_test_bit(i))
246 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247 }
248
249 if (test_facility(28)) /* TOD-clock steering */
250 ptff(kvm_s390_available_subfunc.ptff,
251 sizeof(kvm_s390_available_subfunc.ptff),
252 PTFF_QAF);
253
254 if (test_facility(17)) { /* MSA */
255 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.kmac);
257 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.kmc);
259 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260 kvm_s390_available_subfunc.km);
261 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.kimd);
263 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.klmd);
265 }
266 if (test_facility(76)) /* MSA3 */
267 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.pckmo);
269 if (test_facility(77)) { /* MSA4 */
270 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.kmctr);
272 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273 kvm_s390_available_subfunc.kmf);
274 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.kmo);
276 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.pcc);
278 }
279 if (test_facility(57)) /* MSA5 */
280 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281 kvm_s390_available_subfunc.ppno);
282
283 if (test_facility(146)) /* MSA8 */
284 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285 kvm_s390_available_subfunc.kma);
286
287 if (MACHINE_HAS_ESOP)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289 /*
290 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292 */
293 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294 !test_facility(3) || !nested)
295 return;
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297 if (sclp.has_64bscao)
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299 if (sclp.has_siif)
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301 if (sclp.has_gpere)
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303 if (sclp.has_gsls)
304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305 if (sclp.has_ib)
306 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307 if (sclp.has_cei)
308 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309 if (sclp.has_ibs)
310 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311 if (sclp.has_kss)
312 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313 /*
314 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315 * all skey handling functions read/set the skey from the PGSTE
316 * instead of the real storage key.
317 *
318 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319 * pages being detected as preserved although they are resident.
320 *
321 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323 *
324 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327 *
328 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329 * cannot easily shadow the SCA because of the ipte lock.
330 */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336 if (!kvm_s390_dbf)
337 return -ENOMEM;
338
339 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340 debug_unregister(kvm_s390_dbf);
341 return -ENOMEM;
342 }
343
344 kvm_s390_cpu_feat_init();
345
346 /* Register floating interrupt controller interface. */
347 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352 debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357 unsigned int ioctl, unsigned long arg)
358 {
359 if (ioctl == KVM_S390_ENABLE_SIE)
360 return s390_enable_sie();
361 return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366 int r;
367
368 switch (ext) {
369 case KVM_CAP_S390_PSW:
370 case KVM_CAP_S390_GMAP:
371 case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373 case KVM_CAP_S390_UCONTROL:
374 #endif
375 case KVM_CAP_ASYNC_PF:
376 case KVM_CAP_SYNC_REGS:
377 case KVM_CAP_ONE_REG:
378 case KVM_CAP_ENABLE_CAP:
379 case KVM_CAP_S390_CSS_SUPPORT:
380 case KVM_CAP_IOEVENTFD:
381 case KVM_CAP_DEVICE_CTRL:
382 case KVM_CAP_ENABLE_CAP_VM:
383 case KVM_CAP_S390_IRQCHIP:
384 case KVM_CAP_VM_ATTRIBUTES:
385 case KVM_CAP_MP_STATE:
386 case KVM_CAP_IMMEDIATE_EXIT:
387 case KVM_CAP_S390_INJECT_IRQ:
388 case KVM_CAP_S390_USER_SIGP:
389 case KVM_CAP_S390_USER_STSI:
390 case KVM_CAP_S390_SKEYS:
391 case KVM_CAP_S390_IRQ_STATE:
392 case KVM_CAP_S390_USER_INSTR0:
393 case KVM_CAP_S390_CMMA_MIGRATION:
394 case KVM_CAP_S390_AIS:
395 case KVM_CAP_S390_AIS_MIGRATION:
396 r = 1;
397 break;
398 case KVM_CAP_S390_MEM_OP:
399 r = MEM_OP_MAX_SIZE;
400 break;
401 case KVM_CAP_NR_VCPUS:
402 case KVM_CAP_MAX_VCPUS:
403 r = KVM_S390_BSCA_CPU_SLOTS;
404 if (!kvm_s390_use_sca_entries())
405 r = KVM_MAX_VCPUS;
406 else if (sclp.has_esca && sclp.has_64bscao)
407 r = KVM_S390_ESCA_CPU_SLOTS;
408 break;
409 case KVM_CAP_NR_MEMSLOTS:
410 r = KVM_USER_MEM_SLOTS;
411 break;
412 case KVM_CAP_S390_COW:
413 r = MACHINE_HAS_ESOP;
414 break;
415 case KVM_CAP_S390_VECTOR_REGISTERS:
416 r = MACHINE_HAS_VX;
417 break;
418 case KVM_CAP_S390_RI:
419 r = test_facility(64);
420 break;
421 case KVM_CAP_S390_GS:
422 r = test_facility(133);
423 break;
424 case KVM_CAP_S390_BPB:
425 r = test_facility(82);
426 break;
427 default:
428 r = 0;
429 }
430 return r;
431 }
432
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434 struct kvm_memory_slot *memslot)
435 {
436 gfn_t cur_gfn, last_gfn;
437 unsigned long address;
438 struct gmap *gmap = kvm->arch.gmap;
439
440 /* Loop over all guest pages */
441 last_gfn = memslot->base_gfn + memslot->npages;
442 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443 address = gfn_to_hva_memslot(memslot, cur_gfn);
444
445 if (test_and_clear_guest_dirty(gmap->mm, address))
446 mark_page_dirty(kvm, cur_gfn);
447 if (fatal_signal_pending(current))
448 return;
449 cond_resched();
450 }
451 }
452
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455
456 /*
457 * Get (and clear) the dirty memory log for a memory slot.
458 */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460 struct kvm_dirty_log *log)
461 {
462 int r;
463 unsigned long n;
464 struct kvm_memslots *slots;
465 struct kvm_memory_slot *memslot;
466 int is_dirty = 0;
467
468 if (kvm_is_ucontrol(kvm))
469 return -EINVAL;
470
471 mutex_lock(&kvm->slots_lock);
472
473 r = -EINVAL;
474 if (log->slot >= KVM_USER_MEM_SLOTS)
475 goto out;
476
477 slots = kvm_memslots(kvm);
478 memslot = id_to_memslot(slots, log->slot);
479 r = -ENOENT;
480 if (!memslot->dirty_bitmap)
481 goto out;
482
483 kvm_s390_sync_dirty_log(kvm, memslot);
484 r = kvm_get_dirty_log(kvm, log, &is_dirty);
485 if (r)
486 goto out;
487
488 /* Clear the dirty log */
489 if (is_dirty) {
490 n = kvm_dirty_bitmap_bytes(memslot);
491 memset(memslot->dirty_bitmap, 0, n);
492 }
493 r = 0;
494 out:
495 mutex_unlock(&kvm->slots_lock);
496 return r;
497 }
498
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501 unsigned int i;
502 struct kvm_vcpu *vcpu;
503
504 kvm_for_each_vcpu(i, vcpu, kvm) {
505 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506 }
507 }
508
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511 int r;
512
513 if (cap->flags)
514 return -EINVAL;
515
516 switch (cap->cap) {
517 case KVM_CAP_S390_IRQCHIP:
518 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519 kvm->arch.use_irqchip = 1;
520 r = 0;
521 break;
522 case KVM_CAP_S390_USER_SIGP:
523 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524 kvm->arch.user_sigp = 1;
525 r = 0;
526 break;
527 case KVM_CAP_S390_VECTOR_REGISTERS:
528 mutex_lock(&kvm->lock);
529 if (kvm->created_vcpus) {
530 r = -EBUSY;
531 } else if (MACHINE_HAS_VX) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 129);
533 set_kvm_facility(kvm->arch.model.fac_list, 129);
534 if (test_facility(134)) {
535 set_kvm_facility(kvm->arch.model.fac_mask, 134);
536 set_kvm_facility(kvm->arch.model.fac_list, 134);
537 }
538 if (test_facility(135)) {
539 set_kvm_facility(kvm->arch.model.fac_mask, 135);
540 set_kvm_facility(kvm->arch.model.fac_list, 135);
541 }
542 r = 0;
543 } else
544 r = -EINVAL;
545 mutex_unlock(&kvm->lock);
546 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547 r ? "(not available)" : "(success)");
548 break;
549 case KVM_CAP_S390_RI:
550 r = -EINVAL;
551 mutex_lock(&kvm->lock);
552 if (kvm->created_vcpus) {
553 r = -EBUSY;
554 } else if (test_facility(64)) {
555 set_kvm_facility(kvm->arch.model.fac_mask, 64);
556 set_kvm_facility(kvm->arch.model.fac_list, 64);
557 r = 0;
558 }
559 mutex_unlock(&kvm->lock);
560 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561 r ? "(not available)" : "(success)");
562 break;
563 case KVM_CAP_S390_AIS:
564 mutex_lock(&kvm->lock);
565 if (kvm->created_vcpus) {
566 r = -EBUSY;
567 } else {
568 set_kvm_facility(kvm->arch.model.fac_mask, 72);
569 set_kvm_facility(kvm->arch.model.fac_list, 72);
570 r = 0;
571 }
572 mutex_unlock(&kvm->lock);
573 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574 r ? "(not available)" : "(success)");
575 break;
576 case KVM_CAP_S390_GS:
577 r = -EINVAL;
578 mutex_lock(&kvm->lock);
579 if (atomic_read(&kvm->online_vcpus)) {
580 r = -EBUSY;
581 } else if (test_facility(133)) {
582 set_kvm_facility(kvm->arch.model.fac_mask, 133);
583 set_kvm_facility(kvm->arch.model.fac_list, 133);
584 r = 0;
585 }
586 mutex_unlock(&kvm->lock);
587 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588 r ? "(not available)" : "(success)");
589 break;
590 case KVM_CAP_S390_USER_STSI:
591 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592 kvm->arch.user_stsi = 1;
593 r = 0;
594 break;
595 case KVM_CAP_S390_USER_INSTR0:
596 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597 kvm->arch.user_instr0 = 1;
598 icpt_operexc_on_all_vcpus(kvm);
599 r = 0;
600 break;
601 default:
602 r = -EINVAL;
603 break;
604 }
605 return r;
606 }
607
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610 int ret;
611
612 switch (attr->attr) {
613 case KVM_S390_VM_MEM_LIMIT_SIZE:
614 ret = 0;
615 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616 kvm->arch.mem_limit);
617 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618 ret = -EFAULT;
619 break;
620 default:
621 ret = -ENXIO;
622 break;
623 }
624 return ret;
625 }
626
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629 int ret;
630 unsigned int idx;
631 switch (attr->attr) {
632 case KVM_S390_VM_MEM_ENABLE_CMMA:
633 ret = -ENXIO;
634 if (!sclp.has_cmma)
635 break;
636
637 ret = -EBUSY;
638 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639 mutex_lock(&kvm->lock);
640 if (!kvm->created_vcpus) {
641 kvm->arch.use_cmma = 1;
642 ret = 0;
643 }
644 mutex_unlock(&kvm->lock);
645 break;
646 case KVM_S390_VM_MEM_CLR_CMMA:
647 ret = -ENXIO;
648 if (!sclp.has_cmma)
649 break;
650 ret = -EINVAL;
651 if (!kvm->arch.use_cmma)
652 break;
653
654 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655 mutex_lock(&kvm->lock);
656 idx = srcu_read_lock(&kvm->srcu);
657 s390_reset_cmma(kvm->arch.gmap->mm);
658 srcu_read_unlock(&kvm->srcu, idx);
659 mutex_unlock(&kvm->lock);
660 ret = 0;
661 break;
662 case KVM_S390_VM_MEM_LIMIT_SIZE: {
663 unsigned long new_limit;
664
665 if (kvm_is_ucontrol(kvm))
666 return -EINVAL;
667
668 if (get_user(new_limit, (u64 __user *)attr->addr))
669 return -EFAULT;
670
671 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672 new_limit > kvm->arch.mem_limit)
673 return -E2BIG;
674
675 if (!new_limit)
676 return -EINVAL;
677
678 /* gmap_create takes last usable address */
679 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680 new_limit -= 1;
681
682 ret = -EBUSY;
683 mutex_lock(&kvm->lock);
684 if (!kvm->created_vcpus) {
685 /* gmap_create will round the limit up */
686 struct gmap *new = gmap_create(current->mm, new_limit);
687
688 if (!new) {
689 ret = -ENOMEM;
690 } else {
691 gmap_remove(kvm->arch.gmap);
692 new->private = kvm;
693 kvm->arch.gmap = new;
694 ret = 0;
695 }
696 }
697 mutex_unlock(&kvm->lock);
698 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700 (void *) kvm->arch.gmap->asce);
701 break;
702 }
703 default:
704 ret = -ENXIO;
705 break;
706 }
707 return ret;
708 }
709
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714 struct kvm_vcpu *vcpu;
715 int i;
716
717 if (!test_kvm_facility(kvm, 76))
718 return -EINVAL;
719
720 mutex_lock(&kvm->lock);
721 switch (attr->attr) {
722 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723 get_random_bytes(
724 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726 kvm->arch.crypto.aes_kw = 1;
727 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728 break;
729 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730 get_random_bytes(
731 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733 kvm->arch.crypto.dea_kw = 1;
734 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735 break;
736 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737 kvm->arch.crypto.aes_kw = 0;
738 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741 break;
742 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743 kvm->arch.crypto.dea_kw = 0;
744 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747 break;
748 default:
749 mutex_unlock(&kvm->lock);
750 return -ENXIO;
751 }
752
753 kvm_for_each_vcpu(i, vcpu, kvm) {
754 kvm_s390_vcpu_crypto_setup(vcpu);
755 exit_sie(vcpu);
756 }
757 mutex_unlock(&kvm->lock);
758 return 0;
759 }
760
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763 int cx;
764 struct kvm_vcpu *vcpu;
765
766 kvm_for_each_vcpu(cx, vcpu, kvm)
767 kvm_s390_sync_request(req, vcpu);
768 }
769
770 /*
771 * Must be called with kvm->srcu held to avoid races on memslots, and with
772 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773 */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776 struct kvm_s390_migration_state *mgs;
777 struct kvm_memory_slot *ms;
778 /* should be the only one */
779 struct kvm_memslots *slots;
780 unsigned long ram_pages;
781 int slotnr;
782
783 /* migration mode already enabled */
784 if (kvm->arch.migration_state)
785 return 0;
786
787 slots = kvm_memslots(kvm);
788 if (!slots || !slots->used_slots)
789 return -EINVAL;
790
791 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792 if (!mgs)
793 return -ENOMEM;
794 kvm->arch.migration_state = mgs;
795
796 if (kvm->arch.use_cmma) {
797 /*
798 * Get the first slot. They are reverse sorted by base_gfn, so
799 * the first slot is also the one at the end of the address
800 * space. We have verified above that at least one slot is
801 * present.
802 */
803 ms = slots->memslots;
804 /* round up so we only use full longs */
805 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
806 /* allocate enough bytes to store all the bits */
807 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
808 if (!mgs->pgste_bitmap) {
809 kfree(mgs);
810 kvm->arch.migration_state = NULL;
811 return -ENOMEM;
812 }
813
814 mgs->bitmap_size = ram_pages;
815 atomic64_set(&mgs->dirty_pages, ram_pages);
816 /* mark all the pages in active slots as dirty */
817 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
818 ms = slots->memslots + slotnr;
819 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
820 }
821
822 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
823 }
824 return 0;
825 }
826
827 /*
828 * Must be called with kvm->lock to avoid races with ourselves and
829 * kvm_s390_vm_start_migration.
830 */
831 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
832 {
833 struct kvm_s390_migration_state *mgs;
834
835 /* migration mode already disabled */
836 if (!kvm->arch.migration_state)
837 return 0;
838 mgs = kvm->arch.migration_state;
839 kvm->arch.migration_state = NULL;
840
841 if (kvm->arch.use_cmma) {
842 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
843 vfree(mgs->pgste_bitmap);
844 }
845 kfree(mgs);
846 return 0;
847 }
848
849 static int kvm_s390_vm_set_migration(struct kvm *kvm,
850 struct kvm_device_attr *attr)
851 {
852 int idx, res = -ENXIO;
853
854 mutex_lock(&kvm->lock);
855 switch (attr->attr) {
856 case KVM_S390_VM_MIGRATION_START:
857 idx = srcu_read_lock(&kvm->srcu);
858 res = kvm_s390_vm_start_migration(kvm);
859 srcu_read_unlock(&kvm->srcu, idx);
860 break;
861 case KVM_S390_VM_MIGRATION_STOP:
862 res = kvm_s390_vm_stop_migration(kvm);
863 break;
864 default:
865 break;
866 }
867 mutex_unlock(&kvm->lock);
868
869 return res;
870 }
871
872 static int kvm_s390_vm_get_migration(struct kvm *kvm,
873 struct kvm_device_attr *attr)
874 {
875 u64 mig = (kvm->arch.migration_state != NULL);
876
877 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
878 return -ENXIO;
879
880 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
881 return -EFAULT;
882 return 0;
883 }
884
885 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887 struct kvm_s390_vm_tod_clock gtod;
888
889 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
890 return -EFAULT;
891
892 if (test_kvm_facility(kvm, 139))
893 kvm_s390_set_tod_clock_ext(kvm, &gtod);
894 else if (gtod.epoch_idx == 0)
895 kvm_s390_set_tod_clock(kvm, gtod.tod);
896 else
897 return -EINVAL;
898
899 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
900 gtod.epoch_idx, gtod.tod);
901
902 return 0;
903 }
904
905 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907 u8 gtod_high;
908
909 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
910 sizeof(gtod_high)))
911 return -EFAULT;
912
913 if (gtod_high != 0)
914 return -EINVAL;
915 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
916
917 return 0;
918 }
919
920 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
921 {
922 u64 gtod;
923
924 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
925 return -EFAULT;
926
927 kvm_s390_set_tod_clock(kvm, gtod);
928 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
929 return 0;
930 }
931
932 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
933 {
934 int ret;
935
936 if (attr->flags)
937 return -EINVAL;
938
939 switch (attr->attr) {
940 case KVM_S390_VM_TOD_EXT:
941 ret = kvm_s390_set_tod_ext(kvm, attr);
942 break;
943 case KVM_S390_VM_TOD_HIGH:
944 ret = kvm_s390_set_tod_high(kvm, attr);
945 break;
946 case KVM_S390_VM_TOD_LOW:
947 ret = kvm_s390_set_tod_low(kvm, attr);
948 break;
949 default:
950 ret = -ENXIO;
951 break;
952 }
953 return ret;
954 }
955
956 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
957 struct kvm_s390_vm_tod_clock *gtod)
958 {
959 struct kvm_s390_tod_clock_ext htod;
960
961 preempt_disable();
962
963 get_tod_clock_ext((char *)&htod);
964
965 gtod->tod = htod.tod + kvm->arch.epoch;
966 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
967
968 if (gtod->tod < htod.tod)
969 gtod->epoch_idx += 1;
970
971 preempt_enable();
972 }
973
974 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 {
976 struct kvm_s390_vm_tod_clock gtod;
977
978 memset(&gtod, 0, sizeof(gtod));
979
980 if (test_kvm_facility(kvm, 139))
981 kvm_s390_get_tod_clock_ext(kvm, &gtod);
982 else
983 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
984
985 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
986 return -EFAULT;
987
988 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
989 gtod.epoch_idx, gtod.tod);
990 return 0;
991 }
992
993 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995 u8 gtod_high = 0;
996
997 if (copy_to_user((void __user *)attr->addr, &gtod_high,
998 sizeof(gtod_high)))
999 return -EFAULT;
1000 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1001
1002 return 0;
1003 }
1004
1005 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007 u64 gtod;
1008
1009 gtod = kvm_s390_get_tod_clock_fast(kvm);
1010 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1011 return -EFAULT;
1012 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1013
1014 return 0;
1015 }
1016
1017 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1018 {
1019 int ret;
1020
1021 if (attr->flags)
1022 return -EINVAL;
1023
1024 switch (attr->attr) {
1025 case KVM_S390_VM_TOD_EXT:
1026 ret = kvm_s390_get_tod_ext(kvm, attr);
1027 break;
1028 case KVM_S390_VM_TOD_HIGH:
1029 ret = kvm_s390_get_tod_high(kvm, attr);
1030 break;
1031 case KVM_S390_VM_TOD_LOW:
1032 ret = kvm_s390_get_tod_low(kvm, attr);
1033 break;
1034 default:
1035 ret = -ENXIO;
1036 break;
1037 }
1038 return ret;
1039 }
1040
1041 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1042 {
1043 struct kvm_s390_vm_cpu_processor *proc;
1044 u16 lowest_ibc, unblocked_ibc;
1045 int ret = 0;
1046
1047 mutex_lock(&kvm->lock);
1048 if (kvm->created_vcpus) {
1049 ret = -EBUSY;
1050 goto out;
1051 }
1052 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1053 if (!proc) {
1054 ret = -ENOMEM;
1055 goto out;
1056 }
1057 if (!copy_from_user(proc, (void __user *)attr->addr,
1058 sizeof(*proc))) {
1059 kvm->arch.model.cpuid = proc->cpuid;
1060 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1061 unblocked_ibc = sclp.ibc & 0xfff;
1062 if (lowest_ibc && proc->ibc) {
1063 if (proc->ibc > unblocked_ibc)
1064 kvm->arch.model.ibc = unblocked_ibc;
1065 else if (proc->ibc < lowest_ibc)
1066 kvm->arch.model.ibc = lowest_ibc;
1067 else
1068 kvm->arch.model.ibc = proc->ibc;
1069 }
1070 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1071 S390_ARCH_FAC_LIST_SIZE_BYTE);
1072 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1073 kvm->arch.model.ibc,
1074 kvm->arch.model.cpuid);
1075 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1076 kvm->arch.model.fac_list[0],
1077 kvm->arch.model.fac_list[1],
1078 kvm->arch.model.fac_list[2]);
1079 } else
1080 ret = -EFAULT;
1081 kfree(proc);
1082 out:
1083 mutex_unlock(&kvm->lock);
1084 return ret;
1085 }
1086
1087 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1088 struct kvm_device_attr *attr)
1089 {
1090 struct kvm_s390_vm_cpu_feat data;
1091 int ret = -EBUSY;
1092
1093 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1094 return -EFAULT;
1095 if (!bitmap_subset((unsigned long *) data.feat,
1096 kvm_s390_available_cpu_feat,
1097 KVM_S390_VM_CPU_FEAT_NR_BITS))
1098 return -EINVAL;
1099
1100 mutex_lock(&kvm->lock);
1101 if (!atomic_read(&kvm->online_vcpus)) {
1102 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1103 KVM_S390_VM_CPU_FEAT_NR_BITS);
1104 ret = 0;
1105 }
1106 mutex_unlock(&kvm->lock);
1107 return ret;
1108 }
1109
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111 struct kvm_device_attr *attr)
1112 {
1113 /*
1114 * Once supported by kernel + hw, we have to store the subfunctions
1115 * in kvm->arch and remember that user space configured them.
1116 */
1117 return -ENXIO;
1118 }
1119
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122 int ret = -ENXIO;
1123
1124 switch (attr->attr) {
1125 case KVM_S390_VM_CPU_PROCESSOR:
1126 ret = kvm_s390_set_processor(kvm, attr);
1127 break;
1128 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129 ret = kvm_s390_set_processor_feat(kvm, attr);
1130 break;
1131 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1133 break;
1134 }
1135 return ret;
1136 }
1137
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 struct kvm_s390_vm_cpu_processor *proc;
1141 int ret = 0;
1142
1143 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1144 if (!proc) {
1145 ret = -ENOMEM;
1146 goto out;
1147 }
1148 proc->cpuid = kvm->arch.model.cpuid;
1149 proc->ibc = kvm->arch.model.ibc;
1150 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151 S390_ARCH_FAC_LIST_SIZE_BYTE);
1152 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153 kvm->arch.model.ibc,
1154 kvm->arch.model.cpuid);
1155 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156 kvm->arch.model.fac_list[0],
1157 kvm->arch.model.fac_list[1],
1158 kvm->arch.model.fac_list[2]);
1159 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1160 ret = -EFAULT;
1161 kfree(proc);
1162 out:
1163 return ret;
1164 }
1165
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 struct kvm_s390_vm_cpu_machine *mach;
1169 int ret = 0;
1170
1171 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1172 if (!mach) {
1173 ret = -ENOMEM;
1174 goto out;
1175 }
1176 get_cpu_id((struct cpuid *) &mach->cpuid);
1177 mach->ibc = sclp.ibc;
1178 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179 S390_ARCH_FAC_LIST_SIZE_BYTE);
1180 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181 sizeof(S390_lowcore.stfle_fac_list));
1182 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1183 kvm->arch.model.ibc,
1184 kvm->arch.model.cpuid);
1185 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1186 mach->fac_mask[0],
1187 mach->fac_mask[1],
1188 mach->fac_mask[2]);
1189 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1190 mach->fac_list[0],
1191 mach->fac_list[1],
1192 mach->fac_list[2]);
1193 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1194 ret = -EFAULT;
1195 kfree(mach);
1196 out:
1197 return ret;
1198 }
1199
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201 struct kvm_device_attr *attr)
1202 {
1203 struct kvm_s390_vm_cpu_feat data;
1204
1205 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206 KVM_S390_VM_CPU_FEAT_NR_BITS);
1207 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1208 return -EFAULT;
1209 return 0;
1210 }
1211
1212 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1213 struct kvm_device_attr *attr)
1214 {
1215 struct kvm_s390_vm_cpu_feat data;
1216
1217 bitmap_copy((unsigned long *) data.feat,
1218 kvm_s390_available_cpu_feat,
1219 KVM_S390_VM_CPU_FEAT_NR_BITS);
1220 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1221 return -EFAULT;
1222 return 0;
1223 }
1224
1225 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1226 struct kvm_device_attr *attr)
1227 {
1228 /*
1229 * Once we can actually configure subfunctions (kernel + hw support),
1230 * we have to check if they were already set by user space, if so copy
1231 * them from kvm->arch.
1232 */
1233 return -ENXIO;
1234 }
1235
1236 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1237 struct kvm_device_attr *attr)
1238 {
1239 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1240 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1241 return -EFAULT;
1242 return 0;
1243 }
1244 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246 int ret = -ENXIO;
1247
1248 switch (attr->attr) {
1249 case KVM_S390_VM_CPU_PROCESSOR:
1250 ret = kvm_s390_get_processor(kvm, attr);
1251 break;
1252 case KVM_S390_VM_CPU_MACHINE:
1253 ret = kvm_s390_get_machine(kvm, attr);
1254 break;
1255 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1256 ret = kvm_s390_get_processor_feat(kvm, attr);
1257 break;
1258 case KVM_S390_VM_CPU_MACHINE_FEAT:
1259 ret = kvm_s390_get_machine_feat(kvm, attr);
1260 break;
1261 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1262 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1263 break;
1264 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1265 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1266 break;
1267 }
1268 return ret;
1269 }
1270
1271 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273 int ret;
1274
1275 switch (attr->group) {
1276 case KVM_S390_VM_MEM_CTRL:
1277 ret = kvm_s390_set_mem_control(kvm, attr);
1278 break;
1279 case KVM_S390_VM_TOD:
1280 ret = kvm_s390_set_tod(kvm, attr);
1281 break;
1282 case KVM_S390_VM_CPU_MODEL:
1283 ret = kvm_s390_set_cpu_model(kvm, attr);
1284 break;
1285 case KVM_S390_VM_CRYPTO:
1286 ret = kvm_s390_vm_set_crypto(kvm, attr);
1287 break;
1288 case KVM_S390_VM_MIGRATION:
1289 ret = kvm_s390_vm_set_migration(kvm, attr);
1290 break;
1291 default:
1292 ret = -ENXIO;
1293 break;
1294 }
1295
1296 return ret;
1297 }
1298
1299 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301 int ret;
1302
1303 switch (attr->group) {
1304 case KVM_S390_VM_MEM_CTRL:
1305 ret = kvm_s390_get_mem_control(kvm, attr);
1306 break;
1307 case KVM_S390_VM_TOD:
1308 ret = kvm_s390_get_tod(kvm, attr);
1309 break;
1310 case KVM_S390_VM_CPU_MODEL:
1311 ret = kvm_s390_get_cpu_model(kvm, attr);
1312 break;
1313 case KVM_S390_VM_MIGRATION:
1314 ret = kvm_s390_vm_get_migration(kvm, attr);
1315 break;
1316 default:
1317 ret = -ENXIO;
1318 break;
1319 }
1320
1321 return ret;
1322 }
1323
1324 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 {
1326 int ret;
1327
1328 switch (attr->group) {
1329 case KVM_S390_VM_MEM_CTRL:
1330 switch (attr->attr) {
1331 case KVM_S390_VM_MEM_ENABLE_CMMA:
1332 case KVM_S390_VM_MEM_CLR_CMMA:
1333 ret = sclp.has_cmma ? 0 : -ENXIO;
1334 break;
1335 case KVM_S390_VM_MEM_LIMIT_SIZE:
1336 ret = 0;
1337 break;
1338 default:
1339 ret = -ENXIO;
1340 break;
1341 }
1342 break;
1343 case KVM_S390_VM_TOD:
1344 switch (attr->attr) {
1345 case KVM_S390_VM_TOD_LOW:
1346 case KVM_S390_VM_TOD_HIGH:
1347 ret = 0;
1348 break;
1349 default:
1350 ret = -ENXIO;
1351 break;
1352 }
1353 break;
1354 case KVM_S390_VM_CPU_MODEL:
1355 switch (attr->attr) {
1356 case KVM_S390_VM_CPU_PROCESSOR:
1357 case KVM_S390_VM_CPU_MACHINE:
1358 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1359 case KVM_S390_VM_CPU_MACHINE_FEAT:
1360 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361 ret = 0;
1362 break;
1363 /* configuring subfunctions is not supported yet */
1364 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1365 default:
1366 ret = -ENXIO;
1367 break;
1368 }
1369 break;
1370 case KVM_S390_VM_CRYPTO:
1371 switch (attr->attr) {
1372 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1373 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1374 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1375 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1376 ret = 0;
1377 break;
1378 default:
1379 ret = -ENXIO;
1380 break;
1381 }
1382 break;
1383 case KVM_S390_VM_MIGRATION:
1384 ret = 0;
1385 break;
1386 default:
1387 ret = -ENXIO;
1388 break;
1389 }
1390
1391 return ret;
1392 }
1393
1394 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1395 {
1396 uint8_t *keys;
1397 uint64_t hva;
1398 int srcu_idx, i, r = 0;
1399
1400 if (args->flags != 0)
1401 return -EINVAL;
1402
1403 /* Is this guest using storage keys? */
1404 if (!mm_use_skey(current->mm))
1405 return KVM_S390_GET_SKEYS_NONE;
1406
1407 /* Enforce sane limit on memory allocation */
1408 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1409 return -EINVAL;
1410
1411 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1412 if (!keys)
1413 return -ENOMEM;
1414
1415 down_read(&current->mm->mmap_sem);
1416 srcu_idx = srcu_read_lock(&kvm->srcu);
1417 for (i = 0; i < args->count; i++) {
1418 hva = gfn_to_hva(kvm, args->start_gfn + i);
1419 if (kvm_is_error_hva(hva)) {
1420 r = -EFAULT;
1421 break;
1422 }
1423
1424 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1425 if (r)
1426 break;
1427 }
1428 srcu_read_unlock(&kvm->srcu, srcu_idx);
1429 up_read(&current->mm->mmap_sem);
1430
1431 if (!r) {
1432 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1433 sizeof(uint8_t) * args->count);
1434 if (r)
1435 r = -EFAULT;
1436 }
1437
1438 kvfree(keys);
1439 return r;
1440 }
1441
1442 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1443 {
1444 uint8_t *keys;
1445 uint64_t hva;
1446 int srcu_idx, i, r = 0;
1447
1448 if (args->flags != 0)
1449 return -EINVAL;
1450
1451 /* Enforce sane limit on memory allocation */
1452 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1453 return -EINVAL;
1454
1455 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1456 if (!keys)
1457 return -ENOMEM;
1458
1459 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1460 sizeof(uint8_t) * args->count);
1461 if (r) {
1462 r = -EFAULT;
1463 goto out;
1464 }
1465
1466 /* Enable storage key handling for the guest */
1467 r = s390_enable_skey();
1468 if (r)
1469 goto out;
1470
1471 down_read(&current->mm->mmap_sem);
1472 srcu_idx = srcu_read_lock(&kvm->srcu);
1473 for (i = 0; i < args->count; i++) {
1474 hva = gfn_to_hva(kvm, args->start_gfn + i);
1475 if (kvm_is_error_hva(hva)) {
1476 r = -EFAULT;
1477 break;
1478 }
1479
1480 /* Lowest order bit is reserved */
1481 if (keys[i] & 0x01) {
1482 r = -EINVAL;
1483 break;
1484 }
1485
1486 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1487 if (r)
1488 break;
1489 }
1490 srcu_read_unlock(&kvm->srcu, srcu_idx);
1491 up_read(&current->mm->mmap_sem);
1492 out:
1493 kvfree(keys);
1494 return r;
1495 }
1496
1497 /*
1498 * Base address and length must be sent at the start of each block, therefore
1499 * it's cheaper to send some clean data, as long as it's less than the size of
1500 * two longs.
1501 */
1502 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1503 /* for consistency */
1504 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1505
1506 /*
1507 * This function searches for the next page with dirty CMMA attributes, and
1508 * saves the attributes in the buffer up to either the end of the buffer or
1509 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1510 * no trailing clean bytes are saved.
1511 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1512 * output buffer will indicate 0 as length.
1513 */
1514 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1515 struct kvm_s390_cmma_log *args)
1516 {
1517 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1518 unsigned long bufsize, hva, pgstev, i, next, cur;
1519 int srcu_idx, peek, r = 0, rr;
1520 u8 *res;
1521
1522 cur = args->start_gfn;
1523 i = next = pgstev = 0;
1524
1525 if (unlikely(!kvm->arch.use_cmma))
1526 return -ENXIO;
1527 /* Invalid/unsupported flags were specified */
1528 if (args->flags & ~KVM_S390_CMMA_PEEK)
1529 return -EINVAL;
1530 /* Migration mode query, and we are not doing a migration */
1531 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1532 if (!peek && !s)
1533 return -EINVAL;
1534 /* CMMA is disabled or was not used, or the buffer has length zero */
1535 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1536 if (!bufsize || !kvm->mm->context.use_cmma) {
1537 memset(args, 0, sizeof(*args));
1538 return 0;
1539 }
1540
1541 if (!peek) {
1542 /* We are not peeking, and there are no dirty pages */
1543 if (!atomic64_read(&s->dirty_pages)) {
1544 memset(args, 0, sizeof(*args));
1545 return 0;
1546 }
1547 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1548 args->start_gfn);
1549 if (cur >= s->bitmap_size) /* nothing found, loop back */
1550 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1551 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1552 memset(args, 0, sizeof(*args));
1553 return 0;
1554 }
1555 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1556 }
1557
1558 res = vmalloc(bufsize);
1559 if (!res)
1560 return -ENOMEM;
1561
1562 args->start_gfn = cur;
1563
1564 down_read(&kvm->mm->mmap_sem);
1565 srcu_idx = srcu_read_lock(&kvm->srcu);
1566 while (i < bufsize) {
1567 hva = gfn_to_hva(kvm, cur);
1568 if (kvm_is_error_hva(hva)) {
1569 r = -EFAULT;
1570 break;
1571 }
1572 /* decrement only if we actually flipped the bit to 0 */
1573 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1574 atomic64_dec(&s->dirty_pages);
1575 r = get_pgste(kvm->mm, hva, &pgstev);
1576 if (r < 0)
1577 pgstev = 0;
1578 /* save the value */
1579 res[i++] = (pgstev >> 24) & 0x43;
1580 /*
1581 * if the next bit is too far away, stop.
1582 * if we reached the previous "next", find the next one
1583 */
1584 if (!peek) {
1585 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1586 break;
1587 if (cur == next)
1588 next = find_next_bit(s->pgste_bitmap,
1589 s->bitmap_size, cur + 1);
1590 /* reached the end of the bitmap or of the buffer, stop */
1591 if ((next >= s->bitmap_size) ||
1592 (next >= args->start_gfn + bufsize))
1593 break;
1594 }
1595 cur++;
1596 }
1597 srcu_read_unlock(&kvm->srcu, srcu_idx);
1598 up_read(&kvm->mm->mmap_sem);
1599 args->count = i;
1600 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1601
1602 rr = copy_to_user((void __user *)args->values, res, args->count);
1603 if (rr)
1604 r = -EFAULT;
1605
1606 vfree(res);
1607 return r;
1608 }
1609
1610 /*
1611 * This function sets the CMMA attributes for the given pages. If the input
1612 * buffer has zero length, no action is taken, otherwise the attributes are
1613 * set and the mm->context.use_cmma flag is set.
1614 */
1615 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1616 const struct kvm_s390_cmma_log *args)
1617 {
1618 unsigned long hva, mask, pgstev, i;
1619 uint8_t *bits;
1620 int srcu_idx, r = 0;
1621
1622 mask = args->mask;
1623
1624 if (!kvm->arch.use_cmma)
1625 return -ENXIO;
1626 /* invalid/unsupported flags */
1627 if (args->flags != 0)
1628 return -EINVAL;
1629 /* Enforce sane limit on memory allocation */
1630 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1631 return -EINVAL;
1632 /* Nothing to do */
1633 if (args->count == 0)
1634 return 0;
1635
1636 bits = vmalloc(sizeof(*bits) * args->count);
1637 if (!bits)
1638 return -ENOMEM;
1639
1640 r = copy_from_user(bits, (void __user *)args->values, args->count);
1641 if (r) {
1642 r = -EFAULT;
1643 goto out;
1644 }
1645
1646 down_read(&kvm->mm->mmap_sem);
1647 srcu_idx = srcu_read_lock(&kvm->srcu);
1648 for (i = 0; i < args->count; i++) {
1649 hva = gfn_to_hva(kvm, args->start_gfn + i);
1650 if (kvm_is_error_hva(hva)) {
1651 r = -EFAULT;
1652 break;
1653 }
1654
1655 pgstev = bits[i];
1656 pgstev = pgstev << 24;
1657 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1658 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1659 }
1660 srcu_read_unlock(&kvm->srcu, srcu_idx);
1661 up_read(&kvm->mm->mmap_sem);
1662
1663 if (!kvm->mm->context.use_cmma) {
1664 down_write(&kvm->mm->mmap_sem);
1665 kvm->mm->context.use_cmma = 1;
1666 up_write(&kvm->mm->mmap_sem);
1667 }
1668 out:
1669 vfree(bits);
1670 return r;
1671 }
1672
1673 long kvm_arch_vm_ioctl(struct file *filp,
1674 unsigned int ioctl, unsigned long arg)
1675 {
1676 struct kvm *kvm = filp->private_data;
1677 void __user *argp = (void __user *)arg;
1678 struct kvm_device_attr attr;
1679 int r;
1680
1681 switch (ioctl) {
1682 case KVM_S390_INTERRUPT: {
1683 struct kvm_s390_interrupt s390int;
1684
1685 r = -EFAULT;
1686 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1687 break;
1688 r = kvm_s390_inject_vm(kvm, &s390int);
1689 break;
1690 }
1691 case KVM_ENABLE_CAP: {
1692 struct kvm_enable_cap cap;
1693 r = -EFAULT;
1694 if (copy_from_user(&cap, argp, sizeof(cap)))
1695 break;
1696 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1697 break;
1698 }
1699 case KVM_CREATE_IRQCHIP: {
1700 struct kvm_irq_routing_entry routing;
1701
1702 r = -EINVAL;
1703 if (kvm->arch.use_irqchip) {
1704 /* Set up dummy routing. */
1705 memset(&routing, 0, sizeof(routing));
1706 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1707 }
1708 break;
1709 }
1710 case KVM_SET_DEVICE_ATTR: {
1711 r = -EFAULT;
1712 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1713 break;
1714 r = kvm_s390_vm_set_attr(kvm, &attr);
1715 break;
1716 }
1717 case KVM_GET_DEVICE_ATTR: {
1718 r = -EFAULT;
1719 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1720 break;
1721 r = kvm_s390_vm_get_attr(kvm, &attr);
1722 break;
1723 }
1724 case KVM_HAS_DEVICE_ATTR: {
1725 r = -EFAULT;
1726 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1727 break;
1728 r = kvm_s390_vm_has_attr(kvm, &attr);
1729 break;
1730 }
1731 case KVM_S390_GET_SKEYS: {
1732 struct kvm_s390_skeys args;
1733
1734 r = -EFAULT;
1735 if (copy_from_user(&args, argp,
1736 sizeof(struct kvm_s390_skeys)))
1737 break;
1738 r = kvm_s390_get_skeys(kvm, &args);
1739 break;
1740 }
1741 case KVM_S390_SET_SKEYS: {
1742 struct kvm_s390_skeys args;
1743
1744 r = -EFAULT;
1745 if (copy_from_user(&args, argp,
1746 sizeof(struct kvm_s390_skeys)))
1747 break;
1748 r = kvm_s390_set_skeys(kvm, &args);
1749 break;
1750 }
1751 case KVM_S390_GET_CMMA_BITS: {
1752 struct kvm_s390_cmma_log args;
1753
1754 r = -EFAULT;
1755 if (copy_from_user(&args, argp, sizeof(args)))
1756 break;
1757 r = kvm_s390_get_cmma_bits(kvm, &args);
1758 if (!r) {
1759 r = copy_to_user(argp, &args, sizeof(args));
1760 if (r)
1761 r = -EFAULT;
1762 }
1763 break;
1764 }
1765 case KVM_S390_SET_CMMA_BITS: {
1766 struct kvm_s390_cmma_log args;
1767
1768 r = -EFAULT;
1769 if (copy_from_user(&args, argp, sizeof(args)))
1770 break;
1771 r = kvm_s390_set_cmma_bits(kvm, &args);
1772 break;
1773 }
1774 default:
1775 r = -ENOTTY;
1776 }
1777
1778 return r;
1779 }
1780
1781 static int kvm_s390_query_ap_config(u8 *config)
1782 {
1783 u32 fcn_code = 0x04000000UL;
1784 u32 cc = 0;
1785
1786 memset(config, 0, 128);
1787 asm volatile(
1788 "lgr 0,%1\n"
1789 "lgr 2,%2\n"
1790 ".long 0xb2af0000\n" /* PQAP(QCI) */
1791 "0: ipm %0\n"
1792 "srl %0,28\n"
1793 "1:\n"
1794 EX_TABLE(0b, 1b)
1795 : "+r" (cc)
1796 : "r" (fcn_code), "r" (config)
1797 : "cc", "0", "2", "memory"
1798 );
1799
1800 return cc;
1801 }
1802
1803 static int kvm_s390_apxa_installed(void)
1804 {
1805 u8 config[128];
1806 int cc;
1807
1808 if (test_facility(12)) {
1809 cc = kvm_s390_query_ap_config(config);
1810
1811 if (cc)
1812 pr_err("PQAP(QCI) failed with cc=%d", cc);
1813 else
1814 return config[0] & 0x40;
1815 }
1816
1817 return 0;
1818 }
1819
1820 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1821 {
1822 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1823
1824 if (kvm_s390_apxa_installed())
1825 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1826 else
1827 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1828 }
1829
1830 static u64 kvm_s390_get_initial_cpuid(void)
1831 {
1832 struct cpuid cpuid;
1833
1834 get_cpu_id(&cpuid);
1835 cpuid.version = 0xff;
1836 return *((u64 *) &cpuid);
1837 }
1838
1839 static void kvm_s390_crypto_init(struct kvm *kvm)
1840 {
1841 if (!test_kvm_facility(kvm, 76))
1842 return;
1843
1844 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1845 kvm_s390_set_crycb_format(kvm);
1846
1847 /* Enable AES/DEA protected key functions by default */
1848 kvm->arch.crypto.aes_kw = 1;
1849 kvm->arch.crypto.dea_kw = 1;
1850 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1851 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1852 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1853 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1854 }
1855
1856 static void sca_dispose(struct kvm *kvm)
1857 {
1858 if (kvm->arch.use_esca)
1859 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1860 else
1861 free_page((unsigned long)(kvm->arch.sca));
1862 kvm->arch.sca = NULL;
1863 }
1864
1865 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1866 {
1867 gfp_t alloc_flags = GFP_KERNEL;
1868 int i, rc;
1869 char debug_name[16];
1870 static unsigned long sca_offset;
1871
1872 rc = -EINVAL;
1873 #ifdef CONFIG_KVM_S390_UCONTROL
1874 if (type & ~KVM_VM_S390_UCONTROL)
1875 goto out_err;
1876 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1877 goto out_err;
1878 #else
1879 if (type)
1880 goto out_err;
1881 #endif
1882
1883 rc = s390_enable_sie();
1884 if (rc)
1885 goto out_err;
1886
1887 rc = -ENOMEM;
1888
1889 kvm->arch.use_esca = 0; /* start with basic SCA */
1890 if (!sclp.has_64bscao)
1891 alloc_flags |= GFP_DMA;
1892 rwlock_init(&kvm->arch.sca_lock);
1893 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894 if (!kvm->arch.sca)
1895 goto out_err;
1896 spin_lock(&kvm_lock);
1897 sca_offset += 16;
1898 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899 sca_offset = 0;
1900 kvm->arch.sca = (struct bsca_block *)
1901 ((char *) kvm->arch.sca + sca_offset);
1902 spin_unlock(&kvm_lock);
1903
1904 sprintf(debug_name, "kvm-%u", current->pid);
1905
1906 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907 if (!kvm->arch.dbf)
1908 goto out_err;
1909
1910 kvm->arch.sie_page2 =
1911 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912 if (!kvm->arch.sie_page2)
1913 goto out_err;
1914
1915 /* Populate the facility mask initially. */
1916 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917 sizeof(S390_lowcore.stfle_fac_list));
1918 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919 if (i < kvm_s390_fac_list_mask_size())
1920 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921 else
1922 kvm->arch.model.fac_mask[i] = 0UL;
1923 }
1924
1925 /* Populate the facility list initially. */
1926 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928 S390_ARCH_FAC_LIST_SIZE_BYTE);
1929
1930 /* we are always in czam mode - even on pre z14 machines */
1931 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932 set_kvm_facility(kvm->arch.model.fac_list, 138);
1933 /* we emulate STHYI in kvm */
1934 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935 set_kvm_facility(kvm->arch.model.fac_list, 74);
1936 if (MACHINE_HAS_TLB_GUEST) {
1937 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938 set_kvm_facility(kvm->arch.model.fac_list, 147);
1939 }
1940
1941 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943
1944 kvm_s390_crypto_init(kvm);
1945
1946 mutex_init(&kvm->arch.float_int.ais_lock);
1947 kvm->arch.float_int.simm = 0;
1948 kvm->arch.float_int.nimm = 0;
1949 spin_lock_init(&kvm->arch.float_int.lock);
1950 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952 init_waitqueue_head(&kvm->arch.ipte_wq);
1953 mutex_init(&kvm->arch.ipte_mutex);
1954
1955 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957
1958 if (type & KVM_VM_S390_UCONTROL) {
1959 kvm->arch.gmap = NULL;
1960 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961 } else {
1962 if (sclp.hamax == U64_MAX)
1963 kvm->arch.mem_limit = TASK_SIZE_MAX;
1964 else
1965 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966 sclp.hamax + 1);
1967 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968 if (!kvm->arch.gmap)
1969 goto out_err;
1970 kvm->arch.gmap->private = kvm;
1971 kvm->arch.gmap->pfault_enabled = 0;
1972 }
1973
1974 kvm->arch.css_support = 0;
1975 kvm->arch.use_irqchip = 0;
1976 kvm->arch.epoch = 0;
1977
1978 spin_lock_init(&kvm->arch.start_stop_lock);
1979 kvm_s390_vsie_init(kvm);
1980 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1981
1982 return 0;
1983 out_err:
1984 free_page((unsigned long)kvm->arch.sie_page2);
1985 debug_unregister(kvm->arch.dbf);
1986 sca_dispose(kvm);
1987 KVM_EVENT(3, "creation of vm failed: %d", rc);
1988 return rc;
1989 }
1990
1991 bool kvm_arch_has_vcpu_debugfs(void)
1992 {
1993 return false;
1994 }
1995
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1997 {
1998 return 0;
1999 }
2000
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002 {
2003 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005 kvm_s390_clear_local_irqs(vcpu);
2006 kvm_clear_async_pf_completion_queue(vcpu);
2007 if (!kvm_is_ucontrol(vcpu->kvm))
2008 sca_del_vcpu(vcpu);
2009
2010 if (kvm_is_ucontrol(vcpu->kvm))
2011 gmap_remove(vcpu->arch.gmap);
2012
2013 if (vcpu->kvm->arch.use_cmma)
2014 kvm_s390_vcpu_unsetup_cmma(vcpu);
2015 free_page((unsigned long)(vcpu->arch.sie_block));
2016
2017 kvm_vcpu_uninit(vcpu);
2018 kmem_cache_free(kvm_vcpu_cache, vcpu);
2019 }
2020
2021 static void kvm_free_vcpus(struct kvm *kvm)
2022 {
2023 unsigned int i;
2024 struct kvm_vcpu *vcpu;
2025
2026 kvm_for_each_vcpu(i, vcpu, kvm)
2027 kvm_arch_vcpu_destroy(vcpu);
2028
2029 mutex_lock(&kvm->lock);
2030 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031 kvm->vcpus[i] = NULL;
2032
2033 atomic_set(&kvm->online_vcpus, 0);
2034 mutex_unlock(&kvm->lock);
2035 }
2036
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2038 {
2039 kvm_free_vcpus(kvm);
2040 sca_dispose(kvm);
2041 debug_unregister(kvm->arch.dbf);
2042 free_page((unsigned long)kvm->arch.sie_page2);
2043 if (!kvm_is_ucontrol(kvm))
2044 gmap_remove(kvm->arch.gmap);
2045 kvm_s390_destroy_adapters(kvm);
2046 kvm_s390_clear_float_irqs(kvm);
2047 kvm_s390_vsie_destroy(kvm);
2048 if (kvm->arch.migration_state) {
2049 vfree(kvm->arch.migration_state->pgste_bitmap);
2050 kfree(kvm->arch.migration_state);
2051 }
2052 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2053 }
2054
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057 {
2058 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059 if (!vcpu->arch.gmap)
2060 return -ENOMEM;
2061 vcpu->arch.gmap->private = vcpu->kvm;
2062
2063 return 0;
2064 }
2065
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067 {
2068 if (!kvm_s390_use_sca_entries())
2069 return;
2070 read_lock(&vcpu->kvm->arch.sca_lock);
2071 if (vcpu->kvm->arch.use_esca) {
2072 struct esca_block *sca = vcpu->kvm->arch.sca;
2073
2074 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075 sca->cpu[vcpu->vcpu_id].sda = 0;
2076 } else {
2077 struct bsca_block *sca = vcpu->kvm->arch.sca;
2078
2079 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080 sca->cpu[vcpu->vcpu_id].sda = 0;
2081 }
2082 read_unlock(&vcpu->kvm->arch.sca_lock);
2083 }
2084
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086 {
2087 if (!kvm_s390_use_sca_entries()) {
2088 struct bsca_block *sca = vcpu->kvm->arch.sca;
2089
2090 /* we still need the basic sca for the ipte control */
2091 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093 }
2094 read_lock(&vcpu->kvm->arch.sca_lock);
2095 if (vcpu->kvm->arch.use_esca) {
2096 struct esca_block *sca = vcpu->kvm->arch.sca;
2097
2098 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103 } else {
2104 struct bsca_block *sca = vcpu->kvm->arch.sca;
2105
2106 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110 }
2111 read_unlock(&vcpu->kvm->arch.sca_lock);
2112 }
2113
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2116 {
2117 d->sda = s->sda;
2118 d->sigp_ctrl.c = s->sigp_ctrl.c;
2119 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2120 }
2121
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2123 {
2124 int i;
2125
2126 d->ipte_control = s->ipte_control;
2127 d->mcn[0] = s->mcn;
2128 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2130 }
2131
2132 static int sca_switch_to_extended(struct kvm *kvm)
2133 {
2134 struct bsca_block *old_sca = kvm->arch.sca;
2135 struct esca_block *new_sca;
2136 struct kvm_vcpu *vcpu;
2137 unsigned int vcpu_idx;
2138 u32 scaol, scaoh;
2139
2140 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2141 if (!new_sca)
2142 return -ENOMEM;
2143
2144 scaoh = (u32)((u64)(new_sca) >> 32);
2145 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146
2147 kvm_s390_vcpu_block_all(kvm);
2148 write_lock(&kvm->arch.sca_lock);
2149
2150 sca_copy_b_to_e(new_sca, old_sca);
2151
2152 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153 vcpu->arch.sie_block->scaoh = scaoh;
2154 vcpu->arch.sie_block->scaol = scaol;
2155 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156 }
2157 kvm->arch.sca = new_sca;
2158 kvm->arch.use_esca = 1;
2159
2160 write_unlock(&kvm->arch.sca_lock);
2161 kvm_s390_vcpu_unblock_all(kvm);
2162
2163 free_page((unsigned long)old_sca);
2164
2165 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166 old_sca, kvm->arch.sca);
2167 return 0;
2168 }
2169
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2171 {
2172 int rc;
2173
2174 if (!kvm_s390_use_sca_entries()) {
2175 if (id < KVM_MAX_VCPUS)
2176 return true;
2177 return false;
2178 }
2179 if (id < KVM_S390_BSCA_CPU_SLOTS)
2180 return true;
2181 if (!sclp.has_esca || !sclp.has_64bscao)
2182 return false;
2183
2184 mutex_lock(&kvm->lock);
2185 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186 mutex_unlock(&kvm->lock);
2187
2188 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2189 }
2190
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192 {
2193 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194 kvm_clear_async_pf_completion_queue(vcpu);
2195 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2196 KVM_SYNC_GPRS |
2197 KVM_SYNC_ACRS |
2198 KVM_SYNC_CRS |
2199 KVM_SYNC_ARCH0 |
2200 KVM_SYNC_PFAULT;
2201 kvm_s390_set_prefix(vcpu, 0);
2202 if (test_kvm_facility(vcpu->kvm, 64))
2203 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204 if (test_kvm_facility(vcpu->kvm, 82))
2205 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2206 if (test_kvm_facility(vcpu->kvm, 133))
2207 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2208 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2209 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2210 */
2211 if (MACHINE_HAS_VX)
2212 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2213 else
2214 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2215
2216 if (kvm_is_ucontrol(vcpu->kvm))
2217 return __kvm_ucontrol_vcpu_init(vcpu);
2218
2219 return 0;
2220 }
2221
2222 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2223 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2224 {
2225 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2226 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2227 vcpu->arch.cputm_start = get_tod_clock_fast();
2228 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2229 }
2230
2231 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2232 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2233 {
2234 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2235 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2236 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2237 vcpu->arch.cputm_start = 0;
2238 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2239 }
2240
2241 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2242 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2243 {
2244 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2245 vcpu->arch.cputm_enabled = true;
2246 __start_cpu_timer_accounting(vcpu);
2247 }
2248
2249 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2250 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2251 {
2252 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2253 __stop_cpu_timer_accounting(vcpu);
2254 vcpu->arch.cputm_enabled = false;
2255 }
2256
2257 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2258 {
2259 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2260 __enable_cpu_timer_accounting(vcpu);
2261 preempt_enable();
2262 }
2263
2264 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2265 {
2266 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2267 __disable_cpu_timer_accounting(vcpu);
2268 preempt_enable();
2269 }
2270
2271 /* set the cpu timer - may only be called from the VCPU thread itself */
2272 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2273 {
2274 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2275 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2276 if (vcpu->arch.cputm_enabled)
2277 vcpu->arch.cputm_start = get_tod_clock_fast();
2278 vcpu->arch.sie_block->cputm = cputm;
2279 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2280 preempt_enable();
2281 }
2282
2283 /* update and get the cpu timer - can also be called from other VCPU threads */
2284 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2285 {
2286 unsigned int seq;
2287 __u64 value;
2288
2289 if (unlikely(!vcpu->arch.cputm_enabled))
2290 return vcpu->arch.sie_block->cputm;
2291
2292 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2293 do {
2294 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2295 /*
2296 * If the writer would ever execute a read in the critical
2297 * section, e.g. in irq context, we have a deadlock.
2298 */
2299 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2300 value = vcpu->arch.sie_block->cputm;
2301 /* if cputm_start is 0, accounting is being started/stopped */
2302 if (likely(vcpu->arch.cputm_start))
2303 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2304 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2305 preempt_enable();
2306 return value;
2307 }
2308
2309 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2310 {
2311
2312 gmap_enable(vcpu->arch.enabled_gmap);
2313 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2314 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2315 __start_cpu_timer_accounting(vcpu);
2316 vcpu->cpu = cpu;
2317 }
2318
2319 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2320 {
2321 vcpu->cpu = -1;
2322 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2323 __stop_cpu_timer_accounting(vcpu);
2324 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2325 vcpu->arch.enabled_gmap = gmap_get_enabled();
2326 gmap_disable(vcpu->arch.enabled_gmap);
2327
2328 }
2329
2330 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2331 {
2332 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2333 vcpu->arch.sie_block->gpsw.mask = 0UL;
2334 vcpu->arch.sie_block->gpsw.addr = 0UL;
2335 kvm_s390_set_prefix(vcpu, 0);
2336 kvm_s390_set_cpu_timer(vcpu, 0);
2337 vcpu->arch.sie_block->ckc = 0UL;
2338 vcpu->arch.sie_block->todpr = 0;
2339 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2340 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2341 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2342 /* make sure the new fpc will be lazily loaded */
2343 save_fpu_regs();
2344 current->thread.fpu.fpc = 0;
2345 vcpu->arch.sie_block->gbea = 1;
2346 vcpu->arch.sie_block->pp = 0;
2347 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2348 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2349 kvm_clear_async_pf_completion_queue(vcpu);
2350 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2351 kvm_s390_vcpu_stop(vcpu);
2352 kvm_s390_clear_local_irqs(vcpu);
2353 }
2354
2355 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2356 {
2357 mutex_lock(&vcpu->kvm->lock);
2358 preempt_disable();
2359 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2360 preempt_enable();
2361 mutex_unlock(&vcpu->kvm->lock);
2362 if (!kvm_is_ucontrol(vcpu->kvm)) {
2363 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2364 sca_add_vcpu(vcpu);
2365 }
2366 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2367 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2368 /* make vcpu_load load the right gmap on the first trigger */
2369 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2370 }
2371
2372 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2373 {
2374 if (!test_kvm_facility(vcpu->kvm, 76))
2375 return;
2376
2377 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2378
2379 if (vcpu->kvm->arch.crypto.aes_kw)
2380 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2381 if (vcpu->kvm->arch.crypto.dea_kw)
2382 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2383
2384 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2385 }
2386
2387 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2388 {
2389 free_page(vcpu->arch.sie_block->cbrlo);
2390 vcpu->arch.sie_block->cbrlo = 0;
2391 }
2392
2393 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2394 {
2395 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2396 if (!vcpu->arch.sie_block->cbrlo)
2397 return -ENOMEM;
2398
2399 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2400 return 0;
2401 }
2402
2403 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2404 {
2405 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2406
2407 vcpu->arch.sie_block->ibc = model->ibc;
2408 if (test_kvm_facility(vcpu->kvm, 7))
2409 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2410 }
2411
2412 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2413 {
2414 int rc = 0;
2415
2416 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2417 CPUSTAT_SM |
2418 CPUSTAT_STOPPED);
2419
2420 if (test_kvm_facility(vcpu->kvm, 78))
2421 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2422 else if (test_kvm_facility(vcpu->kvm, 8))
2423 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2424
2425 kvm_s390_vcpu_setup_model(vcpu);
2426
2427 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2428 if (MACHINE_HAS_ESOP)
2429 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2430 if (test_kvm_facility(vcpu->kvm, 9))
2431 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2432 if (test_kvm_facility(vcpu->kvm, 73))
2433 vcpu->arch.sie_block->ecb |= ECB_TE;
2434
2435 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2436 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2437 if (test_kvm_facility(vcpu->kvm, 130))
2438 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2439 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2440 if (sclp.has_cei)
2441 vcpu->arch.sie_block->eca |= ECA_CEI;
2442 if (sclp.has_ib)
2443 vcpu->arch.sie_block->eca |= ECA_IB;
2444 if (sclp.has_siif)
2445 vcpu->arch.sie_block->eca |= ECA_SII;
2446 if (sclp.has_sigpif)
2447 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2448 if (test_kvm_facility(vcpu->kvm, 129)) {
2449 vcpu->arch.sie_block->eca |= ECA_VX;
2450 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2451 }
2452 if (test_kvm_facility(vcpu->kvm, 139))
2453 vcpu->arch.sie_block->ecd |= ECD_MEF;
2454
2455 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2456 | SDNXC;
2457 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2458
2459 if (sclp.has_kss)
2460 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2461 else
2462 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2463
2464 if (vcpu->kvm->arch.use_cmma) {
2465 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2466 if (rc)
2467 return rc;
2468 }
2469 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2470 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2471
2472 kvm_s390_vcpu_crypto_setup(vcpu);
2473
2474 return rc;
2475 }
2476
2477 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2478 unsigned int id)
2479 {
2480 struct kvm_vcpu *vcpu;
2481 struct sie_page *sie_page;
2482 int rc = -EINVAL;
2483
2484 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2485 goto out;
2486
2487 rc = -ENOMEM;
2488
2489 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2490 if (!vcpu)
2491 goto out;
2492
2493 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2494 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2495 if (!sie_page)
2496 goto out_free_cpu;
2497
2498 vcpu->arch.sie_block = &sie_page->sie_block;
2499 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2500
2501 /* the real guest size will always be smaller than msl */
2502 vcpu->arch.sie_block->mso = 0;
2503 vcpu->arch.sie_block->msl = sclp.hamax;
2504
2505 vcpu->arch.sie_block->icpua = id;
2506 spin_lock_init(&vcpu->arch.local_int.lock);
2507 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2508 vcpu->arch.local_int.wq = &vcpu->wq;
2509 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2510 seqcount_init(&vcpu->arch.cputm_seqcount);
2511
2512 rc = kvm_vcpu_init(vcpu, kvm, id);
2513 if (rc)
2514 goto out_free_sie_block;
2515 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2516 vcpu->arch.sie_block);
2517 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2518
2519 return vcpu;
2520 out_free_sie_block:
2521 free_page((unsigned long)(vcpu->arch.sie_block));
2522 out_free_cpu:
2523 kmem_cache_free(kvm_vcpu_cache, vcpu);
2524 out:
2525 return ERR_PTR(rc);
2526 }
2527
2528 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2529 {
2530 return kvm_s390_vcpu_has_irq(vcpu, 0);
2531 }
2532
2533 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2534 {
2535 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2536 }
2537
2538 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2539 {
2540 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2541 exit_sie(vcpu);
2542 }
2543
2544 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2545 {
2546 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2547 }
2548
2549 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2550 {
2551 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2552 exit_sie(vcpu);
2553 }
2554
2555 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2556 {
2557 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2558 }
2559
2560 /*
2561 * Kick a guest cpu out of SIE and wait until SIE is not running.
2562 * If the CPU is not running (e.g. waiting as idle) the function will
2563 * return immediately. */
2564 void exit_sie(struct kvm_vcpu *vcpu)
2565 {
2566 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2567 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2568 cpu_relax();
2569 }
2570
2571 /* Kick a guest cpu out of SIE to process a request synchronously */
2572 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2573 {
2574 kvm_make_request(req, vcpu);
2575 kvm_s390_vcpu_request(vcpu);
2576 }
2577
2578 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2579 unsigned long end)
2580 {
2581 struct kvm *kvm = gmap->private;
2582 struct kvm_vcpu *vcpu;
2583 unsigned long prefix;
2584 int i;
2585
2586 if (gmap_is_shadow(gmap))
2587 return;
2588 if (start >= 1UL << 31)
2589 /* We are only interested in prefix pages */
2590 return;
2591 kvm_for_each_vcpu(i, vcpu, kvm) {
2592 /* match against both prefix pages */
2593 prefix = kvm_s390_get_prefix(vcpu);
2594 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2595 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2596 start, end);
2597 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2598 }
2599 }
2600 }
2601
2602 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2603 {
2604 /* kvm common code refers to this, but never calls it */
2605 BUG();
2606 return 0;
2607 }
2608
2609 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2610 struct kvm_one_reg *reg)
2611 {
2612 int r = -EINVAL;
2613
2614 switch (reg->id) {
2615 case KVM_REG_S390_TODPR:
2616 r = put_user(vcpu->arch.sie_block->todpr,
2617 (u32 __user *)reg->addr);
2618 break;
2619 case KVM_REG_S390_EPOCHDIFF:
2620 r = put_user(vcpu->arch.sie_block->epoch,
2621 (u64 __user *)reg->addr);
2622 break;
2623 case KVM_REG_S390_CPU_TIMER:
2624 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2625 (u64 __user *)reg->addr);
2626 break;
2627 case KVM_REG_S390_CLOCK_COMP:
2628 r = put_user(vcpu->arch.sie_block->ckc,
2629 (u64 __user *)reg->addr);
2630 break;
2631 case KVM_REG_S390_PFTOKEN:
2632 r = put_user(vcpu->arch.pfault_token,
2633 (u64 __user *)reg->addr);
2634 break;
2635 case KVM_REG_S390_PFCOMPARE:
2636 r = put_user(vcpu->arch.pfault_compare,
2637 (u64 __user *)reg->addr);
2638 break;
2639 case KVM_REG_S390_PFSELECT:
2640 r = put_user(vcpu->arch.pfault_select,
2641 (u64 __user *)reg->addr);
2642 break;
2643 case KVM_REG_S390_PP:
2644 r = put_user(vcpu->arch.sie_block->pp,
2645 (u64 __user *)reg->addr);
2646 break;
2647 case KVM_REG_S390_GBEA:
2648 r = put_user(vcpu->arch.sie_block->gbea,
2649 (u64 __user *)reg->addr);
2650 break;
2651 default:
2652 break;
2653 }
2654
2655 return r;
2656 }
2657
2658 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2659 struct kvm_one_reg *reg)
2660 {
2661 int r = -EINVAL;
2662 __u64 val;
2663
2664 switch (reg->id) {
2665 case KVM_REG_S390_TODPR:
2666 r = get_user(vcpu->arch.sie_block->todpr,
2667 (u32 __user *)reg->addr);
2668 break;
2669 case KVM_REG_S390_EPOCHDIFF:
2670 r = get_user(vcpu->arch.sie_block->epoch,
2671 (u64 __user *)reg->addr);
2672 break;
2673 case KVM_REG_S390_CPU_TIMER:
2674 r = get_user(val, (u64 __user *)reg->addr);
2675 if (!r)
2676 kvm_s390_set_cpu_timer(vcpu, val);
2677 break;
2678 case KVM_REG_S390_CLOCK_COMP:
2679 r = get_user(vcpu->arch.sie_block->ckc,
2680 (u64 __user *)reg->addr);
2681 break;
2682 case KVM_REG_S390_PFTOKEN:
2683 r = get_user(vcpu->arch.pfault_token,
2684 (u64 __user *)reg->addr);
2685 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2686 kvm_clear_async_pf_completion_queue(vcpu);
2687 break;
2688 case KVM_REG_S390_PFCOMPARE:
2689 r = get_user(vcpu->arch.pfault_compare,
2690 (u64 __user *)reg->addr);
2691 break;
2692 case KVM_REG_S390_PFSELECT:
2693 r = get_user(vcpu->arch.pfault_select,
2694 (u64 __user *)reg->addr);
2695 break;
2696 case KVM_REG_S390_PP:
2697 r = get_user(vcpu->arch.sie_block->pp,
2698 (u64 __user *)reg->addr);
2699 break;
2700 case KVM_REG_S390_GBEA:
2701 r = get_user(vcpu->arch.sie_block->gbea,
2702 (u64 __user *)reg->addr);
2703 break;
2704 default:
2705 break;
2706 }
2707
2708 return r;
2709 }
2710
2711 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2712 {
2713 kvm_s390_vcpu_initial_reset(vcpu);
2714 return 0;
2715 }
2716
2717 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2718 {
2719 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2720 return 0;
2721 }
2722
2723 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2724 {
2725 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2726 return 0;
2727 }
2728
2729 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2730 struct kvm_sregs *sregs)
2731 {
2732 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2733 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2734 return 0;
2735 }
2736
2737 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2738 struct kvm_sregs *sregs)
2739 {
2740 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2741 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2742 return 0;
2743 }
2744
2745 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2746 {
2747 if (test_fp_ctl(fpu->fpc))
2748 return -EINVAL;
2749 vcpu->run->s.regs.fpc = fpu->fpc;
2750 if (MACHINE_HAS_VX)
2751 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2752 (freg_t *) fpu->fprs);
2753 else
2754 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2755 return 0;
2756 }
2757
2758 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2759 {
2760 /* make sure we have the latest values */
2761 save_fpu_regs();
2762 if (MACHINE_HAS_VX)
2763 convert_vx_to_fp((freg_t *) fpu->fprs,
2764 (__vector128 *) vcpu->run->s.regs.vrs);
2765 else
2766 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2767 fpu->fpc = vcpu->run->s.regs.fpc;
2768 return 0;
2769 }
2770
2771 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2772 {
2773 int rc = 0;
2774
2775 if (!is_vcpu_stopped(vcpu))
2776 rc = -EBUSY;
2777 else {
2778 vcpu->run->psw_mask = psw.mask;
2779 vcpu->run->psw_addr = psw.addr;
2780 }
2781 return rc;
2782 }
2783
2784 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2785 struct kvm_translation *tr)
2786 {
2787 return -EINVAL; /* not implemented yet */
2788 }
2789
2790 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2791 KVM_GUESTDBG_USE_HW_BP | \
2792 KVM_GUESTDBG_ENABLE)
2793
2794 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2795 struct kvm_guest_debug *dbg)
2796 {
2797 int rc = 0;
2798
2799 vcpu->guest_debug = 0;
2800 kvm_s390_clear_bp_data(vcpu);
2801
2802 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2803 return -EINVAL;
2804 if (!sclp.has_gpere)
2805 return -EINVAL;
2806
2807 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2808 vcpu->guest_debug = dbg->control;
2809 /* enforce guest PER */
2810 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2811
2812 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2813 rc = kvm_s390_import_bp_data(vcpu, dbg);
2814 } else {
2815 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2816 vcpu->arch.guestdbg.last_bp = 0;
2817 }
2818
2819 if (rc) {
2820 vcpu->guest_debug = 0;
2821 kvm_s390_clear_bp_data(vcpu);
2822 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2823 }
2824
2825 return rc;
2826 }
2827
2828 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2829 struct kvm_mp_state *mp_state)
2830 {
2831 /* CHECK_STOP and LOAD are not supported yet */
2832 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2833 KVM_MP_STATE_OPERATING;
2834 }
2835
2836 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2837 struct kvm_mp_state *mp_state)
2838 {
2839 int rc = 0;
2840
2841 /* user space knows about this interface - let it control the state */
2842 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2843
2844 switch (mp_state->mp_state) {
2845 case KVM_MP_STATE_STOPPED:
2846 kvm_s390_vcpu_stop(vcpu);
2847 break;
2848 case KVM_MP_STATE_OPERATING:
2849 kvm_s390_vcpu_start(vcpu);
2850 break;
2851 case KVM_MP_STATE_LOAD:
2852 case KVM_MP_STATE_CHECK_STOP:
2853 /* fall through - CHECK_STOP and LOAD are not supported yet */
2854 default:
2855 rc = -ENXIO;
2856 }
2857
2858 return rc;
2859 }
2860
2861 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2862 {
2863 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2864 }
2865
2866 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2867 {
2868 retry:
2869 kvm_s390_vcpu_request_handled(vcpu);
2870 if (!kvm_request_pending(vcpu))
2871 return 0;
2872 /*
2873 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2874 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2875 * This ensures that the ipte instruction for this request has
2876 * already finished. We might race against a second unmapper that
2877 * wants to set the blocking bit. Lets just retry the request loop.
2878 */
2879 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2880 int rc;
2881 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2882 kvm_s390_get_prefix(vcpu),
2883 PAGE_SIZE * 2, PROT_WRITE);
2884 if (rc) {
2885 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2886 return rc;
2887 }
2888 goto retry;
2889 }
2890
2891 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2892 vcpu->arch.sie_block->ihcpu = 0xffff;
2893 goto retry;
2894 }
2895
2896 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2897 if (!ibs_enabled(vcpu)) {
2898 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2899 atomic_or(CPUSTAT_IBS,
2900 &vcpu->arch.sie_block->cpuflags);
2901 }
2902 goto retry;
2903 }
2904
2905 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2906 if (ibs_enabled(vcpu)) {
2907 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2908 atomic_andnot(CPUSTAT_IBS,
2909 &vcpu->arch.sie_block->cpuflags);
2910 }
2911 goto retry;
2912 }
2913
2914 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2915 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2916 goto retry;
2917 }
2918
2919 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2920 /*
2921 * Disable CMMA virtualization; we will emulate the ESSA
2922 * instruction manually, in order to provide additional
2923 * functionalities needed for live migration.
2924 */
2925 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2926 goto retry;
2927 }
2928
2929 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2930 /*
2931 * Re-enable CMMA virtualization if CMMA is available and
2932 * was used.
2933 */
2934 if ((vcpu->kvm->arch.use_cmma) &&
2935 (vcpu->kvm->mm->context.use_cmma))
2936 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2937 goto retry;
2938 }
2939
2940 /* nothing to do, just clear the request */
2941 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2942
2943 return 0;
2944 }
2945
2946 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2947 const struct kvm_s390_vm_tod_clock *gtod)
2948 {
2949 struct kvm_vcpu *vcpu;
2950 struct kvm_s390_tod_clock_ext htod;
2951 int i;
2952
2953 mutex_lock(&kvm->lock);
2954 preempt_disable();
2955
2956 get_tod_clock_ext((char *)&htod);
2957
2958 kvm->arch.epoch = gtod->tod - htod.tod;
2959 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2960
2961 if (kvm->arch.epoch > gtod->tod)
2962 kvm->arch.epdx -= 1;
2963
2964 kvm_s390_vcpu_block_all(kvm);
2965 kvm_for_each_vcpu(i, vcpu, kvm) {
2966 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2967 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2968 }
2969
2970 kvm_s390_vcpu_unblock_all(kvm);
2971 preempt_enable();
2972 mutex_unlock(&kvm->lock);
2973 }
2974
2975 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2976 {
2977 struct kvm_vcpu *vcpu;
2978 int i;
2979
2980 mutex_lock(&kvm->lock);
2981 preempt_disable();
2982 kvm->arch.epoch = tod - get_tod_clock();
2983 kvm_s390_vcpu_block_all(kvm);
2984 kvm_for_each_vcpu(i, vcpu, kvm)
2985 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2986 kvm_s390_vcpu_unblock_all(kvm);
2987 preempt_enable();
2988 mutex_unlock(&kvm->lock);
2989 }
2990
2991 /**
2992 * kvm_arch_fault_in_page - fault-in guest page if necessary
2993 * @vcpu: The corresponding virtual cpu
2994 * @gpa: Guest physical address
2995 * @writable: Whether the page should be writable or not
2996 *
2997 * Make sure that a guest page has been faulted-in on the host.
2998 *
2999 * Return: Zero on success, negative error code otherwise.
3000 */
3001 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3002 {
3003 return gmap_fault(vcpu->arch.gmap, gpa,
3004 writable ? FAULT_FLAG_WRITE : 0);
3005 }
3006
3007 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3008 unsigned long token)
3009 {
3010 struct kvm_s390_interrupt inti;
3011 struct kvm_s390_irq irq;
3012
3013 if (start_token) {
3014 irq.u.ext.ext_params2 = token;
3015 irq.type = KVM_S390_INT_PFAULT_INIT;
3016 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3017 } else {
3018 inti.type = KVM_S390_INT_PFAULT_DONE;
3019 inti.parm64 = token;
3020 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3021 }
3022 }
3023
3024 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3025 struct kvm_async_pf *work)
3026 {
3027 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3028 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3029 }
3030
3031 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3032 struct kvm_async_pf *work)
3033 {
3034 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3035 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3036 }
3037
3038 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3039 struct kvm_async_pf *work)
3040 {
3041 /* s390 will always inject the page directly */
3042 }
3043
3044 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3045 {
3046 /*
3047 * s390 will always inject the page directly,
3048 * but we still want check_async_completion to cleanup
3049 */
3050 return true;
3051 }
3052
3053 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3054 {
3055 hva_t hva;
3056 struct kvm_arch_async_pf arch;
3057 int rc;
3058
3059 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3060 return 0;
3061 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3062 vcpu->arch.pfault_compare)
3063 return 0;
3064 if (psw_extint_disabled(vcpu))
3065 return 0;
3066 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3067 return 0;
3068 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3069 return 0;
3070 if (!vcpu->arch.gmap->pfault_enabled)
3071 return 0;
3072
3073 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3074 hva += current->thread.gmap_addr & ~PAGE_MASK;
3075 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3076 return 0;
3077
3078 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3079 return rc;
3080 }
3081
3082 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3083 {
3084 int rc, cpuflags;
3085
3086 /*
3087 * On s390 notifications for arriving pages will be delivered directly
3088 * to the guest but the house keeping for completed pfaults is
3089 * handled outside the worker.
3090 */
3091 kvm_check_async_pf_completion(vcpu);
3092
3093 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3094 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3095
3096 if (need_resched())
3097 schedule();
3098
3099 if (test_cpu_flag(CIF_MCCK_PENDING))
3100 s390_handle_mcck();
3101
3102 if (!kvm_is_ucontrol(vcpu->kvm)) {
3103 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3104 if (rc)
3105 return rc;
3106 }
3107
3108 rc = kvm_s390_handle_requests(vcpu);
3109 if (rc)
3110 return rc;
3111
3112 if (guestdbg_enabled(vcpu)) {
3113 kvm_s390_backup_guest_per_regs(vcpu);
3114 kvm_s390_patch_guest_per_regs(vcpu);
3115 }
3116
3117 vcpu->arch.sie_block->icptcode = 0;
3118 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3119 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3120 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3121
3122 return 0;
3123 }
3124
3125 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3126 {
3127 struct kvm_s390_pgm_info pgm_info = {
3128 .code = PGM_ADDRESSING,
3129 };
3130 u8 opcode, ilen;
3131 int rc;
3132
3133 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3134 trace_kvm_s390_sie_fault(vcpu);
3135
3136 /*
3137 * We want to inject an addressing exception, which is defined as a
3138 * suppressing or terminating exception. However, since we came here
3139 * by a DAT access exception, the PSW still points to the faulting
3140 * instruction since DAT exceptions are nullifying. So we've got
3141 * to look up the current opcode to get the length of the instruction
3142 * to be able to forward the PSW.
3143 */
3144 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3145 ilen = insn_length(opcode);
3146 if (rc < 0) {
3147 return rc;
3148 } else if (rc) {
3149 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3150 * Forward by arbitrary ilc, injection will take care of
3151 * nullification if necessary.
3152 */
3153 pgm_info = vcpu->arch.pgm;
3154 ilen = 4;
3155 }
3156 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3157 kvm_s390_forward_psw(vcpu, ilen);
3158 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3159 }
3160
3161 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3162 {
3163 struct mcck_volatile_info *mcck_info;
3164 struct sie_page *sie_page;
3165
3166 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3167 vcpu->arch.sie_block->icptcode);
3168 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3169
3170 if (guestdbg_enabled(vcpu))
3171 kvm_s390_restore_guest_per_regs(vcpu);
3172
3173 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3174 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3175
3176 if (exit_reason == -EINTR) {
3177 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3178 sie_page = container_of(vcpu->arch.sie_block,
3179 struct sie_page, sie_block);
3180 mcck_info = &sie_page->mcck_info;
3181 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3182 return 0;
3183 }
3184
3185 if (vcpu->arch.sie_block->icptcode > 0) {
3186 int rc = kvm_handle_sie_intercept(vcpu);
3187
3188 if (rc != -EOPNOTSUPP)
3189 return rc;
3190 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3191 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3192 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3193 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3194 return -EREMOTE;
3195 } else if (exit_reason != -EFAULT) {
3196 vcpu->stat.exit_null++;
3197 return 0;
3198 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3199 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3200 vcpu->run->s390_ucontrol.trans_exc_code =
3201 current->thread.gmap_addr;
3202 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3203 return -EREMOTE;
3204 } else if (current->thread.gmap_pfault) {
3205 trace_kvm_s390_major_guest_pfault(vcpu);
3206 current->thread.gmap_pfault = 0;
3207 if (kvm_arch_setup_async_pf(vcpu))
3208 return 0;
3209 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3210 }
3211 return vcpu_post_run_fault_in_sie(vcpu);
3212 }
3213
3214 static int __vcpu_run(struct kvm_vcpu *vcpu)
3215 {
3216 int rc, exit_reason;
3217
3218 /*
3219 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3220 * ning the guest), so that memslots (and other stuff) are protected
3221 */
3222 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3223
3224 do {
3225 rc = vcpu_pre_run(vcpu);
3226 if (rc)
3227 break;
3228
3229 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3230 /*
3231 * As PF_VCPU will be used in fault handler, between
3232 * guest_enter and guest_exit should be no uaccess.
3233 */
3234 local_irq_disable();
3235 guest_enter_irqoff();
3236 __disable_cpu_timer_accounting(vcpu);
3237 local_irq_enable();
3238 exit_reason = sie64a(vcpu->arch.sie_block,
3239 vcpu->run->s.regs.gprs);
3240 local_irq_disable();
3241 __enable_cpu_timer_accounting(vcpu);
3242 guest_exit_irqoff();
3243 local_irq_enable();
3244 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3245
3246 rc = vcpu_post_run(vcpu, exit_reason);
3247 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3248
3249 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3250 return rc;
3251 }
3252
3253 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3254 {
3255 struct runtime_instr_cb *riccb;
3256 struct gs_cb *gscb;
3257
3258 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3259 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3260 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3261 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3262 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3263 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3264 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3265 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3266 /* some control register changes require a tlb flush */
3267 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3268 }
3269 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3270 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3271 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3272 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3273 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3274 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3275 }
3276 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3277 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3278 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3279 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3280 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3281 kvm_clear_async_pf_completion_queue(vcpu);
3282 }
3283 /*
3284 * If userspace sets the riccb (e.g. after migration) to a valid state,
3285 * we should enable RI here instead of doing the lazy enablement.
3286 */
3287 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3288 test_kvm_facility(vcpu->kvm, 64) &&
3289 riccb->v &&
3290 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3291 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3292 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3293 }
3294 /*
3295 * If userspace sets the gscb (e.g. after migration) to non-zero,
3296 * we should enable GS here instead of doing the lazy enablement.
3297 */
3298 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3299 test_kvm_facility(vcpu->kvm, 133) &&
3300 gscb->gssm &&
3301 !vcpu->arch.gs_enabled) {
3302 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3303 vcpu->arch.sie_block->ecb |= ECB_GS;
3304 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3305 vcpu->arch.gs_enabled = 1;
3306 }
3307 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3308 test_kvm_facility(vcpu->kvm, 82)) {
3309 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3310 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3311 }
3312 save_access_regs(vcpu->arch.host_acrs);
3313 restore_access_regs(vcpu->run->s.regs.acrs);
3314 /* save host (userspace) fprs/vrs */
3315 save_fpu_regs();
3316 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3317 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3318 if (MACHINE_HAS_VX)
3319 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3320 else
3321 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3322 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3323 if (test_fp_ctl(current->thread.fpu.fpc))
3324 /* User space provided an invalid FPC, let's clear it */
3325 current->thread.fpu.fpc = 0;
3326 if (MACHINE_HAS_GS) {
3327 preempt_disable();
3328 __ctl_set_bit(2, 4);
3329 if (current->thread.gs_cb) {
3330 vcpu->arch.host_gscb = current->thread.gs_cb;
3331 save_gs_cb(vcpu->arch.host_gscb);
3332 }
3333 if (vcpu->arch.gs_enabled) {
3334 current->thread.gs_cb = (struct gs_cb *)
3335 &vcpu->run->s.regs.gscb;
3336 restore_gs_cb(current->thread.gs_cb);
3337 }
3338 preempt_enable();
3339 }
3340
3341 kvm_run->kvm_dirty_regs = 0;
3342 }
3343
3344 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3345 {
3346 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3347 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3348 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3349 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3350 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3351 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3352 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3353 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3354 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3355 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3356 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3357 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3358 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3359 save_access_regs(vcpu->run->s.regs.acrs);
3360 restore_access_regs(vcpu->arch.host_acrs);
3361 /* Save guest register state */
3362 save_fpu_regs();
3363 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3364 /* Restore will be done lazily at return */
3365 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3366 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3367 if (MACHINE_HAS_GS) {
3368 __ctl_set_bit(2, 4);
3369 if (vcpu->arch.gs_enabled)
3370 save_gs_cb(current->thread.gs_cb);
3371 preempt_disable();
3372 current->thread.gs_cb = vcpu->arch.host_gscb;
3373 restore_gs_cb(vcpu->arch.host_gscb);
3374 preempt_enable();
3375 if (!vcpu->arch.host_gscb)
3376 __ctl_clear_bit(2, 4);
3377 vcpu->arch.host_gscb = NULL;
3378 }
3379
3380 }
3381
3382 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3383 {
3384 int rc;
3385
3386 if (kvm_run->immediate_exit)
3387 return -EINTR;
3388
3389 if (guestdbg_exit_pending(vcpu)) {
3390 kvm_s390_prepare_debug_exit(vcpu);
3391 return 0;
3392 }
3393
3394 kvm_sigset_activate(vcpu);
3395
3396 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3397 kvm_s390_vcpu_start(vcpu);
3398 } else if (is_vcpu_stopped(vcpu)) {
3399 pr_err_ratelimited("can't run stopped vcpu %d\n",
3400 vcpu->vcpu_id);
3401 return -EINVAL;
3402 }
3403
3404 sync_regs(vcpu, kvm_run);
3405 enable_cpu_timer_accounting(vcpu);
3406
3407 might_fault();
3408 rc = __vcpu_run(vcpu);
3409
3410 if (signal_pending(current) && !rc) {
3411 kvm_run->exit_reason = KVM_EXIT_INTR;
3412 rc = -EINTR;
3413 }
3414
3415 if (guestdbg_exit_pending(vcpu) && !rc) {
3416 kvm_s390_prepare_debug_exit(vcpu);
3417 rc = 0;
3418 }
3419
3420 if (rc == -EREMOTE) {
3421 /* userspace support is needed, kvm_run has been prepared */
3422 rc = 0;
3423 }
3424
3425 disable_cpu_timer_accounting(vcpu);
3426 store_regs(vcpu, kvm_run);
3427
3428 kvm_sigset_deactivate(vcpu);
3429
3430 vcpu->stat.exit_userspace++;
3431 return rc;
3432 }
3433
3434 /*
3435 * store status at address
3436 * we use have two special cases:
3437 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3438 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3439 */
3440 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3441 {
3442 unsigned char archmode = 1;
3443 freg_t fprs[NUM_FPRS];
3444 unsigned int px;
3445 u64 clkcomp, cputm;
3446 int rc;
3447
3448 px = kvm_s390_get_prefix(vcpu);
3449 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3450 if (write_guest_abs(vcpu, 163, &archmode, 1))
3451 return -EFAULT;
3452 gpa = 0;
3453 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3454 if (write_guest_real(vcpu, 163, &archmode, 1))
3455 return -EFAULT;
3456 gpa = px;
3457 } else
3458 gpa -= __LC_FPREGS_SAVE_AREA;
3459
3460 /* manually convert vector registers if necessary */
3461 if (MACHINE_HAS_VX) {
3462 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3463 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3464 fprs, 128);
3465 } else {
3466 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3467 vcpu->run->s.regs.fprs, 128);
3468 }
3469 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3470 vcpu->run->s.regs.gprs, 128);
3471 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3472 &vcpu->arch.sie_block->gpsw, 16);
3473 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3474 &px, 4);
3475 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3476 &vcpu->run->s.regs.fpc, 4);
3477 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3478 &vcpu->arch.sie_block->todpr, 4);
3479 cputm = kvm_s390_get_cpu_timer(vcpu);
3480 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3481 &cputm, 8);
3482 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3483 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3484 &clkcomp, 8);
3485 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3486 &vcpu->run->s.regs.acrs, 64);
3487 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3488 &vcpu->arch.sie_block->gcr, 128);
3489 return rc ? -EFAULT : 0;
3490 }
3491
3492 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3493 {
3494 /*
3495 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3496 * switch in the run ioctl. Let's update our copies before we save
3497 * it into the save area
3498 */
3499 save_fpu_regs();
3500 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3501 save_access_regs(vcpu->run->s.regs.acrs);
3502
3503 return kvm_s390_store_status_unloaded(vcpu, addr);
3504 }
3505
3506 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3507 {
3508 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3509 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3510 }
3511
3512 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3513 {
3514 unsigned int i;
3515 struct kvm_vcpu *vcpu;
3516
3517 kvm_for_each_vcpu(i, vcpu, kvm) {
3518 __disable_ibs_on_vcpu(vcpu);
3519 }
3520 }
3521
3522 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3523 {
3524 if (!sclp.has_ibs)
3525 return;
3526 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3527 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3528 }
3529
3530 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3531 {
3532 int i, online_vcpus, started_vcpus = 0;
3533
3534 if (!is_vcpu_stopped(vcpu))
3535 return;
3536
3537 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3538 /* Only one cpu at a time may enter/leave the STOPPED state. */
3539 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3540 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3541
3542 for (i = 0; i < online_vcpus; i++) {
3543 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3544 started_vcpus++;
3545 }
3546
3547 if (started_vcpus == 0) {
3548 /* we're the only active VCPU -> speed it up */
3549 __enable_ibs_on_vcpu(vcpu);
3550 } else if (started_vcpus == 1) {
3551 /*
3552 * As we are starting a second VCPU, we have to disable
3553 * the IBS facility on all VCPUs to remove potentially
3554 * oustanding ENABLE requests.
3555 */
3556 __disable_ibs_on_all_vcpus(vcpu->kvm);
3557 }
3558
3559 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3560 /*
3561 * Another VCPU might have used IBS while we were offline.
3562 * Let's play safe and flush the VCPU at startup.
3563 */
3564 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3565 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3566 return;
3567 }
3568
3569 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3570 {
3571 int i, online_vcpus, started_vcpus = 0;
3572 struct kvm_vcpu *started_vcpu = NULL;
3573
3574 if (is_vcpu_stopped(vcpu))
3575 return;
3576
3577 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3578 /* Only one cpu at a time may enter/leave the STOPPED state. */
3579 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3580 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3581
3582 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3583 kvm_s390_clear_stop_irq(vcpu);
3584
3585 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3586 __disable_ibs_on_vcpu(vcpu);
3587
3588 for (i = 0; i < online_vcpus; i++) {
3589 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3590 started_vcpus++;
3591 started_vcpu = vcpu->kvm->vcpus[i];
3592 }
3593 }
3594
3595 if (started_vcpus == 1) {
3596 /*
3597 * As we only have one VCPU left, we want to enable the
3598 * IBS facility for that VCPU to speed it up.
3599 */
3600 __enable_ibs_on_vcpu(started_vcpu);
3601 }
3602
3603 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3604 return;
3605 }
3606
3607 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3608 struct kvm_enable_cap *cap)
3609 {
3610 int r;
3611
3612 if (cap->flags)
3613 return -EINVAL;
3614
3615 switch (cap->cap) {
3616 case KVM_CAP_S390_CSS_SUPPORT:
3617 if (!vcpu->kvm->arch.css_support) {
3618 vcpu->kvm->arch.css_support = 1;
3619 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3620 trace_kvm_s390_enable_css(vcpu->kvm);
3621 }
3622 r = 0;
3623 break;
3624 default:
3625 r = -EINVAL;
3626 break;
3627 }
3628 return r;
3629 }
3630
3631 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3632 struct kvm_s390_mem_op *mop)
3633 {
3634 void __user *uaddr = (void __user *)mop->buf;
3635 void *tmpbuf = NULL;
3636 int r, srcu_idx;
3637 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3638 | KVM_S390_MEMOP_F_CHECK_ONLY;
3639
3640 if (mop->flags & ~supported_flags)
3641 return -EINVAL;
3642
3643 if (mop->size > MEM_OP_MAX_SIZE)
3644 return -E2BIG;
3645
3646 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3647 tmpbuf = vmalloc(mop->size);
3648 if (!tmpbuf)
3649 return -ENOMEM;
3650 }
3651
3652 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3653
3654 switch (mop->op) {
3655 case KVM_S390_MEMOP_LOGICAL_READ:
3656 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3657 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3658 mop->size, GACC_FETCH);
3659 break;
3660 }
3661 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3662 if (r == 0) {
3663 if (copy_to_user(uaddr, tmpbuf, mop->size))
3664 r = -EFAULT;
3665 }
3666 break;
3667 case KVM_S390_MEMOP_LOGICAL_WRITE:
3668 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3669 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3670 mop->size, GACC_STORE);
3671 break;
3672 }
3673 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3674 r = -EFAULT;
3675 break;
3676 }
3677 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3678 break;
3679 default:
3680 r = -EINVAL;
3681 }
3682
3683 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3684
3685 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3686 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3687
3688 vfree(tmpbuf);
3689 return r;
3690 }
3691
3692 long kvm_arch_vcpu_ioctl(struct file *filp,
3693 unsigned int ioctl, unsigned long arg)
3694 {
3695 struct kvm_vcpu *vcpu = filp->private_data;
3696 void __user *argp = (void __user *)arg;
3697 int idx;
3698 long r;
3699
3700 switch (ioctl) {
3701 case KVM_S390_IRQ: {
3702 struct kvm_s390_irq s390irq;
3703
3704 r = -EFAULT;
3705 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3706 break;
3707 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3708 break;
3709 }
3710 case KVM_S390_INTERRUPT: {
3711 struct kvm_s390_interrupt s390int;
3712 struct kvm_s390_irq s390irq;
3713
3714 r = -EFAULT;
3715 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3716 break;
3717 if (s390int_to_s390irq(&s390int, &s390irq))
3718 return -EINVAL;
3719 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3720 break;
3721 }
3722 case KVM_S390_STORE_STATUS:
3723 idx = srcu_read_lock(&vcpu->kvm->srcu);
3724 r = kvm_s390_vcpu_store_status(vcpu, arg);
3725 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3726 break;
3727 case KVM_S390_SET_INITIAL_PSW: {
3728 psw_t psw;
3729
3730 r = -EFAULT;
3731 if (copy_from_user(&psw, argp, sizeof(psw)))
3732 break;
3733 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3734 break;
3735 }
3736 case KVM_S390_INITIAL_RESET:
3737 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3738 break;
3739 case KVM_SET_ONE_REG:
3740 case KVM_GET_ONE_REG: {
3741 struct kvm_one_reg reg;
3742 r = -EFAULT;
3743 if (copy_from_user(&reg, argp, sizeof(reg)))
3744 break;
3745 if (ioctl == KVM_SET_ONE_REG)
3746 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3747 else
3748 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3749 break;
3750 }
3751 #ifdef CONFIG_KVM_S390_UCONTROL
3752 case KVM_S390_UCAS_MAP: {
3753 struct kvm_s390_ucas_mapping ucasmap;
3754
3755 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3756 r = -EFAULT;
3757 break;
3758 }
3759
3760 if (!kvm_is_ucontrol(vcpu->kvm)) {
3761 r = -EINVAL;
3762 break;
3763 }
3764
3765 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3766 ucasmap.vcpu_addr, ucasmap.length);
3767 break;
3768 }
3769 case KVM_S390_UCAS_UNMAP: {
3770 struct kvm_s390_ucas_mapping ucasmap;
3771
3772 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3773 r = -EFAULT;
3774 break;
3775 }
3776
3777 if (!kvm_is_ucontrol(vcpu->kvm)) {
3778 r = -EINVAL;
3779 break;
3780 }
3781
3782 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3783 ucasmap.length);
3784 break;
3785 }
3786 #endif
3787 case KVM_S390_VCPU_FAULT: {
3788 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3789 break;
3790 }
3791 case KVM_ENABLE_CAP:
3792 {
3793 struct kvm_enable_cap cap;
3794 r = -EFAULT;
3795 if (copy_from_user(&cap, argp, sizeof(cap)))
3796 break;
3797 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3798 break;
3799 }
3800 case KVM_S390_MEM_OP: {
3801 struct kvm_s390_mem_op mem_op;
3802
3803 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3804 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3805 else
3806 r = -EFAULT;
3807 break;
3808 }
3809 case KVM_S390_SET_IRQ_STATE: {
3810 struct kvm_s390_irq_state irq_state;
3811
3812 r = -EFAULT;
3813 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3814 break;
3815 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3816 irq_state.len == 0 ||
3817 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3818 r = -EINVAL;
3819 break;
3820 }
3821 /* do not use irq_state.flags, it will break old QEMUs */
3822 r = kvm_s390_set_irq_state(vcpu,
3823 (void __user *) irq_state.buf,
3824 irq_state.len);
3825 break;
3826 }
3827 case KVM_S390_GET_IRQ_STATE: {
3828 struct kvm_s390_irq_state irq_state;
3829
3830 r = -EFAULT;
3831 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3832 break;
3833 if (irq_state.len == 0) {
3834 r = -EINVAL;
3835 break;
3836 }
3837 /* do not use irq_state.flags, it will break old QEMUs */
3838 r = kvm_s390_get_irq_state(vcpu,
3839 (__u8 __user *) irq_state.buf,
3840 irq_state.len);
3841 break;
3842 }
3843 default:
3844 r = -ENOTTY;
3845 }
3846 return r;
3847 }
3848
3849 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3850 {
3851 #ifdef CONFIG_KVM_S390_UCONTROL
3852 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3853 && (kvm_is_ucontrol(vcpu->kvm))) {
3854 vmf->page = virt_to_page(vcpu->arch.sie_block);
3855 get_page(vmf->page);
3856 return 0;
3857 }
3858 #endif
3859 return VM_FAULT_SIGBUS;
3860 }
3861
3862 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3863 unsigned long npages)
3864 {
3865 return 0;
3866 }
3867
3868 /* Section: memory related */
3869 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3870 struct kvm_memory_slot *memslot,
3871 const struct kvm_userspace_memory_region *mem,
3872 enum kvm_mr_change change)
3873 {
3874 /* A few sanity checks. We can have memory slots which have to be
3875 located/ended at a segment boundary (1MB). The memory in userland is
3876 ok to be fragmented into various different vmas. It is okay to mmap()
3877 and munmap() stuff in this slot after doing this call at any time */
3878
3879 if (mem->userspace_addr & 0xffffful)
3880 return -EINVAL;
3881
3882 if (mem->memory_size & 0xffffful)
3883 return -EINVAL;
3884
3885 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3886 return -EINVAL;
3887
3888 return 0;
3889 }
3890
3891 void kvm_arch_commit_memory_region(struct kvm *kvm,
3892 const struct kvm_userspace_memory_region *mem,
3893 const struct kvm_memory_slot *old,
3894 const struct kvm_memory_slot *new,
3895 enum kvm_mr_change change)
3896 {
3897 int rc;
3898
3899 /* If the basics of the memslot do not change, we do not want
3900 * to update the gmap. Every update causes several unnecessary
3901 * segment translation exceptions. This is usually handled just
3902 * fine by the normal fault handler + gmap, but it will also
3903 * cause faults on the prefix page of running guest CPUs.
3904 */
3905 if (old->userspace_addr == mem->userspace_addr &&
3906 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3907 old->npages * PAGE_SIZE == mem->memory_size)
3908 return;
3909
3910 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3911 mem->guest_phys_addr, mem->memory_size);
3912 if (rc)
3913 pr_warn("failed to commit memory region\n");
3914 return;
3915 }
3916
3917 static inline unsigned long nonhyp_mask(int i)
3918 {
3919 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3920
3921 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3922 }
3923
3924 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3925 {
3926 vcpu->valid_wakeup = false;
3927 }
3928
3929 static int __init kvm_s390_init(void)
3930 {
3931 int i;
3932
3933 if (!sclp.has_sief2) {
3934 pr_info("SIE not available\n");
3935 return -ENODEV;
3936 }
3937
3938 for (i = 0; i < 16; i++)
3939 kvm_s390_fac_list_mask[i] |=
3940 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3941
3942 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3943 }
3944
3945 static void __exit kvm_s390_exit(void)
3946 {
3947 kvm_exit();
3948 }
3949
3950 module_init(kvm_s390_init);
3951 module_exit(kvm_s390_exit);
3952
3953 /*
3954 * Enable autoloading of the kvm module.
3955 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3956 * since x86 takes a different approach.
3957 */
3958 #include <linux/miscdevice.h>
3959 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3960 MODULE_ALIAS("devname:kvm");