]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/s390/kvm/kvm-s390.c
KVM: Let KVM_SET_SIGNAL_MASK work as advertised
[mirror_ubuntu-bionic-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 { NULL }
131 };
132
133 struct kvm_s390_tod_clock_ext {
134 __u8 epoch_idx;
135 __u64 tod;
136 __u8 reserved[7];
137 } __packed;
138
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165 /* every s390 is virtualization enabled ;-) */
166 return 0;
167 }
168
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 unsigned long end);
171
172 /*
173 * This callback is executed during stop_machine(). All CPUs are therefore
174 * temporarily stopped. In order not to change guest behavior, we have to
175 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176 * so a CPU won't be stopped while calculating with the epoch.
177 */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 void *v)
180 {
181 struct kvm *kvm;
182 struct kvm_vcpu *vcpu;
183 int i;
184 unsigned long long *delta = v;
185
186 list_for_each_entry(kvm, &vm_list, vm_list) {
187 kvm->arch.epoch -= *delta;
188 kvm_for_each_vcpu(i, vcpu, kvm) {
189 vcpu->arch.sie_block->epoch -= *delta;
190 if (vcpu->arch.cputm_enabled)
191 vcpu->arch.cputm_start += *delta;
192 if (vcpu->arch.vsie_block)
193 vcpu->arch.vsie_block->epoch -= *delta;
194 }
195 }
196 return NOTIFY_OK;
197 }
198
199 static struct notifier_block kvm_clock_notifier = {
200 .notifier_call = kvm_clock_sync,
201 };
202
203 int kvm_arch_hardware_setup(void)
204 {
205 gmap_notifier.notifier_call = kvm_gmap_notifier;
206 gmap_register_pte_notifier(&gmap_notifier);
207 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 gmap_register_pte_notifier(&vsie_gmap_notifier);
209 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 &kvm_clock_notifier);
211 return 0;
212 }
213
214 void kvm_arch_hardware_unsetup(void)
215 {
216 gmap_unregister_pte_notifier(&gmap_notifier);
217 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 &kvm_clock_notifier);
220 }
221
222 static void allow_cpu_feat(unsigned long nr)
223 {
224 set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226
227 static inline int plo_test_bit(unsigned char nr)
228 {
229 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 int cc;
231
232 asm volatile(
233 /* Parameter registers are ignored for "test bit" */
234 " plo 0,0,0,0(0)\n"
235 " ipm %0\n"
236 " srl %0,28\n"
237 : "=d" (cc)
238 : "d" (r0)
239 : "cc");
240 return cc == 0;
241 }
242
243 static void kvm_s390_cpu_feat_init(void)
244 {
245 int i;
246
247 for (i = 0; i < 256; ++i) {
248 if (plo_test_bit(i))
249 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250 }
251
252 if (test_facility(28)) /* TOD-clock steering */
253 ptff(kvm_s390_available_subfunc.ptff,
254 sizeof(kvm_s390_available_subfunc.ptff),
255 PTFF_QAF);
256
257 if (test_facility(17)) { /* MSA */
258 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kmac);
260 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.kmc);
262 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 kvm_s390_available_subfunc.km);
264 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.kimd);
266 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.klmd);
268 }
269 if (test_facility(76)) /* MSA3 */
270 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.pckmo);
272 if (test_facility(77)) { /* MSA4 */
273 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.kmctr);
275 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 kvm_s390_available_subfunc.kmf);
277 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.kmo);
279 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 kvm_s390_available_subfunc.pcc);
281 }
282 if (test_facility(57)) /* MSA5 */
283 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 kvm_s390_available_subfunc.ppno);
285
286 if (test_facility(146)) /* MSA8 */
287 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 kvm_s390_available_subfunc.kma);
289
290 if (MACHINE_HAS_ESOP)
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292 /*
293 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295 */
296 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 !test_facility(3) || !nested)
298 return;
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 if (sclp.has_64bscao)
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302 if (sclp.has_siif)
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304 if (sclp.has_gpere)
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306 if (sclp.has_gsls)
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308 if (sclp.has_ib)
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310 if (sclp.has_cei)
311 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312 if (sclp.has_ibs)
313 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314 if (sclp.has_kss)
315 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316 /*
317 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 * all skey handling functions read/set the skey from the PGSTE
319 * instead of the real storage key.
320 *
321 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 * pages being detected as preserved although they are resident.
323 *
324 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326 *
327 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330 *
331 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 * cannot easily shadow the SCA because of the ipte lock.
333 */
334 }
335
336 int kvm_arch_init(void *opaque)
337 {
338 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 if (!kvm_s390_dbf)
340 return -ENOMEM;
341
342 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 debug_unregister(kvm_s390_dbf);
344 return -ENOMEM;
345 }
346
347 kvm_s390_cpu_feat_init();
348
349 /* Register floating interrupt controller interface. */
350 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352
353 void kvm_arch_exit(void)
354 {
355 debug_unregister(kvm_s390_dbf);
356 }
357
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 unsigned int ioctl, unsigned long arg)
361 {
362 if (ioctl == KVM_S390_ENABLE_SIE)
363 return s390_enable_sie();
364 return -EINVAL;
365 }
366
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369 int r;
370
371 switch (ext) {
372 case KVM_CAP_S390_PSW:
373 case KVM_CAP_S390_GMAP:
374 case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 case KVM_CAP_S390_UCONTROL:
377 #endif
378 case KVM_CAP_ASYNC_PF:
379 case KVM_CAP_SYNC_REGS:
380 case KVM_CAP_ONE_REG:
381 case KVM_CAP_ENABLE_CAP:
382 case KVM_CAP_S390_CSS_SUPPORT:
383 case KVM_CAP_IOEVENTFD:
384 case KVM_CAP_DEVICE_CTRL:
385 case KVM_CAP_ENABLE_CAP_VM:
386 case KVM_CAP_S390_IRQCHIP:
387 case KVM_CAP_VM_ATTRIBUTES:
388 case KVM_CAP_MP_STATE:
389 case KVM_CAP_IMMEDIATE_EXIT:
390 case KVM_CAP_S390_INJECT_IRQ:
391 case KVM_CAP_S390_USER_SIGP:
392 case KVM_CAP_S390_USER_STSI:
393 case KVM_CAP_S390_SKEYS:
394 case KVM_CAP_S390_IRQ_STATE:
395 case KVM_CAP_S390_USER_INSTR0:
396 case KVM_CAP_S390_CMMA_MIGRATION:
397 case KVM_CAP_S390_AIS:
398 case KVM_CAP_S390_AIS_MIGRATION:
399 r = 1;
400 break;
401 case KVM_CAP_S390_MEM_OP:
402 r = MEM_OP_MAX_SIZE;
403 break;
404 case KVM_CAP_NR_VCPUS:
405 case KVM_CAP_MAX_VCPUS:
406 r = KVM_S390_BSCA_CPU_SLOTS;
407 if (!kvm_s390_use_sca_entries())
408 r = KVM_MAX_VCPUS;
409 else if (sclp.has_esca && sclp.has_64bscao)
410 r = KVM_S390_ESCA_CPU_SLOTS;
411 break;
412 case KVM_CAP_NR_MEMSLOTS:
413 r = KVM_USER_MEM_SLOTS;
414 break;
415 case KVM_CAP_S390_COW:
416 r = MACHINE_HAS_ESOP;
417 break;
418 case KVM_CAP_S390_VECTOR_REGISTERS:
419 r = MACHINE_HAS_VX;
420 break;
421 case KVM_CAP_S390_RI:
422 r = test_facility(64);
423 break;
424 case KVM_CAP_S390_GS:
425 r = test_facility(133);
426 break;
427 default:
428 r = 0;
429 }
430 return r;
431 }
432
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434 struct kvm_memory_slot *memslot)
435 {
436 gfn_t cur_gfn, last_gfn;
437 unsigned long address;
438 struct gmap *gmap = kvm->arch.gmap;
439
440 /* Loop over all guest pages */
441 last_gfn = memslot->base_gfn + memslot->npages;
442 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443 address = gfn_to_hva_memslot(memslot, cur_gfn);
444
445 if (test_and_clear_guest_dirty(gmap->mm, address))
446 mark_page_dirty(kvm, cur_gfn);
447 if (fatal_signal_pending(current))
448 return;
449 cond_resched();
450 }
451 }
452
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455
456 /*
457 * Get (and clear) the dirty memory log for a memory slot.
458 */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460 struct kvm_dirty_log *log)
461 {
462 int r;
463 unsigned long n;
464 struct kvm_memslots *slots;
465 struct kvm_memory_slot *memslot;
466 int is_dirty = 0;
467
468 if (kvm_is_ucontrol(kvm))
469 return -EINVAL;
470
471 mutex_lock(&kvm->slots_lock);
472
473 r = -EINVAL;
474 if (log->slot >= KVM_USER_MEM_SLOTS)
475 goto out;
476
477 slots = kvm_memslots(kvm);
478 memslot = id_to_memslot(slots, log->slot);
479 r = -ENOENT;
480 if (!memslot->dirty_bitmap)
481 goto out;
482
483 kvm_s390_sync_dirty_log(kvm, memslot);
484 r = kvm_get_dirty_log(kvm, log, &is_dirty);
485 if (r)
486 goto out;
487
488 /* Clear the dirty log */
489 if (is_dirty) {
490 n = kvm_dirty_bitmap_bytes(memslot);
491 memset(memslot->dirty_bitmap, 0, n);
492 }
493 r = 0;
494 out:
495 mutex_unlock(&kvm->slots_lock);
496 return r;
497 }
498
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501 unsigned int i;
502 struct kvm_vcpu *vcpu;
503
504 kvm_for_each_vcpu(i, vcpu, kvm) {
505 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506 }
507 }
508
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511 int r;
512
513 if (cap->flags)
514 return -EINVAL;
515
516 switch (cap->cap) {
517 case KVM_CAP_S390_IRQCHIP:
518 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519 kvm->arch.use_irqchip = 1;
520 r = 0;
521 break;
522 case KVM_CAP_S390_USER_SIGP:
523 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524 kvm->arch.user_sigp = 1;
525 r = 0;
526 break;
527 case KVM_CAP_S390_VECTOR_REGISTERS:
528 mutex_lock(&kvm->lock);
529 if (kvm->created_vcpus) {
530 r = -EBUSY;
531 } else if (MACHINE_HAS_VX) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 129);
533 set_kvm_facility(kvm->arch.model.fac_list, 129);
534 if (test_facility(134)) {
535 set_kvm_facility(kvm->arch.model.fac_mask, 134);
536 set_kvm_facility(kvm->arch.model.fac_list, 134);
537 }
538 if (test_facility(135)) {
539 set_kvm_facility(kvm->arch.model.fac_mask, 135);
540 set_kvm_facility(kvm->arch.model.fac_list, 135);
541 }
542 r = 0;
543 } else
544 r = -EINVAL;
545 mutex_unlock(&kvm->lock);
546 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547 r ? "(not available)" : "(success)");
548 break;
549 case KVM_CAP_S390_RI:
550 r = -EINVAL;
551 mutex_lock(&kvm->lock);
552 if (kvm->created_vcpus) {
553 r = -EBUSY;
554 } else if (test_facility(64)) {
555 set_kvm_facility(kvm->arch.model.fac_mask, 64);
556 set_kvm_facility(kvm->arch.model.fac_list, 64);
557 r = 0;
558 }
559 mutex_unlock(&kvm->lock);
560 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561 r ? "(not available)" : "(success)");
562 break;
563 case KVM_CAP_S390_AIS:
564 mutex_lock(&kvm->lock);
565 if (kvm->created_vcpus) {
566 r = -EBUSY;
567 } else {
568 set_kvm_facility(kvm->arch.model.fac_mask, 72);
569 set_kvm_facility(kvm->arch.model.fac_list, 72);
570 r = 0;
571 }
572 mutex_unlock(&kvm->lock);
573 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574 r ? "(not available)" : "(success)");
575 break;
576 case KVM_CAP_S390_GS:
577 r = -EINVAL;
578 mutex_lock(&kvm->lock);
579 if (atomic_read(&kvm->online_vcpus)) {
580 r = -EBUSY;
581 } else if (test_facility(133)) {
582 set_kvm_facility(kvm->arch.model.fac_mask, 133);
583 set_kvm_facility(kvm->arch.model.fac_list, 133);
584 r = 0;
585 }
586 mutex_unlock(&kvm->lock);
587 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588 r ? "(not available)" : "(success)");
589 break;
590 case KVM_CAP_S390_USER_STSI:
591 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592 kvm->arch.user_stsi = 1;
593 r = 0;
594 break;
595 case KVM_CAP_S390_USER_INSTR0:
596 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597 kvm->arch.user_instr0 = 1;
598 icpt_operexc_on_all_vcpus(kvm);
599 r = 0;
600 break;
601 default:
602 r = -EINVAL;
603 break;
604 }
605 return r;
606 }
607
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610 int ret;
611
612 switch (attr->attr) {
613 case KVM_S390_VM_MEM_LIMIT_SIZE:
614 ret = 0;
615 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616 kvm->arch.mem_limit);
617 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618 ret = -EFAULT;
619 break;
620 default:
621 ret = -ENXIO;
622 break;
623 }
624 return ret;
625 }
626
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629 int ret;
630 unsigned int idx;
631 switch (attr->attr) {
632 case KVM_S390_VM_MEM_ENABLE_CMMA:
633 ret = -ENXIO;
634 if (!sclp.has_cmma)
635 break;
636
637 ret = -EBUSY;
638 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639 mutex_lock(&kvm->lock);
640 if (!kvm->created_vcpus) {
641 kvm->arch.use_cmma = 1;
642 ret = 0;
643 }
644 mutex_unlock(&kvm->lock);
645 break;
646 case KVM_S390_VM_MEM_CLR_CMMA:
647 ret = -ENXIO;
648 if (!sclp.has_cmma)
649 break;
650 ret = -EINVAL;
651 if (!kvm->arch.use_cmma)
652 break;
653
654 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655 mutex_lock(&kvm->lock);
656 idx = srcu_read_lock(&kvm->srcu);
657 s390_reset_cmma(kvm->arch.gmap->mm);
658 srcu_read_unlock(&kvm->srcu, idx);
659 mutex_unlock(&kvm->lock);
660 ret = 0;
661 break;
662 case KVM_S390_VM_MEM_LIMIT_SIZE: {
663 unsigned long new_limit;
664
665 if (kvm_is_ucontrol(kvm))
666 return -EINVAL;
667
668 if (get_user(new_limit, (u64 __user *)attr->addr))
669 return -EFAULT;
670
671 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672 new_limit > kvm->arch.mem_limit)
673 return -E2BIG;
674
675 if (!new_limit)
676 return -EINVAL;
677
678 /* gmap_create takes last usable address */
679 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680 new_limit -= 1;
681
682 ret = -EBUSY;
683 mutex_lock(&kvm->lock);
684 if (!kvm->created_vcpus) {
685 /* gmap_create will round the limit up */
686 struct gmap *new = gmap_create(current->mm, new_limit);
687
688 if (!new) {
689 ret = -ENOMEM;
690 } else {
691 gmap_remove(kvm->arch.gmap);
692 new->private = kvm;
693 kvm->arch.gmap = new;
694 ret = 0;
695 }
696 }
697 mutex_unlock(&kvm->lock);
698 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700 (void *) kvm->arch.gmap->asce);
701 break;
702 }
703 default:
704 ret = -ENXIO;
705 break;
706 }
707 return ret;
708 }
709
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714 struct kvm_vcpu *vcpu;
715 int i;
716
717 if (!test_kvm_facility(kvm, 76))
718 return -EINVAL;
719
720 mutex_lock(&kvm->lock);
721 switch (attr->attr) {
722 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723 get_random_bytes(
724 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726 kvm->arch.crypto.aes_kw = 1;
727 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728 break;
729 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730 get_random_bytes(
731 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733 kvm->arch.crypto.dea_kw = 1;
734 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735 break;
736 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737 kvm->arch.crypto.aes_kw = 0;
738 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741 break;
742 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743 kvm->arch.crypto.dea_kw = 0;
744 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747 break;
748 default:
749 mutex_unlock(&kvm->lock);
750 return -ENXIO;
751 }
752
753 kvm_for_each_vcpu(i, vcpu, kvm) {
754 kvm_s390_vcpu_crypto_setup(vcpu);
755 exit_sie(vcpu);
756 }
757 mutex_unlock(&kvm->lock);
758 return 0;
759 }
760
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763 int cx;
764 struct kvm_vcpu *vcpu;
765
766 kvm_for_each_vcpu(cx, vcpu, kvm)
767 kvm_s390_sync_request(req, vcpu);
768 }
769
770 /*
771 * Must be called with kvm->srcu held to avoid races on memslots, and with
772 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773 */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776 struct kvm_s390_migration_state *mgs;
777 struct kvm_memory_slot *ms;
778 /* should be the only one */
779 struct kvm_memslots *slots;
780 unsigned long ram_pages;
781 int slotnr;
782
783 /* migration mode already enabled */
784 if (kvm->arch.migration_state)
785 return 0;
786
787 slots = kvm_memslots(kvm);
788 if (!slots || !slots->used_slots)
789 return -EINVAL;
790
791 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792 if (!mgs)
793 return -ENOMEM;
794 kvm->arch.migration_state = mgs;
795
796 if (kvm->arch.use_cmma) {
797 /*
798 * Get the last slot. They should be sorted by base_gfn, so the
799 * last slot is also the one at the end of the address space.
800 * We have verified above that at least one slot is present.
801 */
802 ms = slots->memslots + slots->used_slots - 1;
803 /* round up so we only use full longs */
804 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
805 /* allocate enough bytes to store all the bits */
806 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
807 if (!mgs->pgste_bitmap) {
808 kfree(mgs);
809 kvm->arch.migration_state = NULL;
810 return -ENOMEM;
811 }
812
813 mgs->bitmap_size = ram_pages;
814 atomic64_set(&mgs->dirty_pages, ram_pages);
815 /* mark all the pages in active slots as dirty */
816 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
817 ms = slots->memslots + slotnr;
818 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
819 }
820
821 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
822 }
823 return 0;
824 }
825
826 /*
827 * Must be called with kvm->lock to avoid races with ourselves and
828 * kvm_s390_vm_start_migration.
829 */
830 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
831 {
832 struct kvm_s390_migration_state *mgs;
833
834 /* migration mode already disabled */
835 if (!kvm->arch.migration_state)
836 return 0;
837 mgs = kvm->arch.migration_state;
838 kvm->arch.migration_state = NULL;
839
840 if (kvm->arch.use_cmma) {
841 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
842 vfree(mgs->pgste_bitmap);
843 }
844 kfree(mgs);
845 return 0;
846 }
847
848 static int kvm_s390_vm_set_migration(struct kvm *kvm,
849 struct kvm_device_attr *attr)
850 {
851 int idx, res = -ENXIO;
852
853 mutex_lock(&kvm->lock);
854 switch (attr->attr) {
855 case KVM_S390_VM_MIGRATION_START:
856 idx = srcu_read_lock(&kvm->srcu);
857 res = kvm_s390_vm_start_migration(kvm);
858 srcu_read_unlock(&kvm->srcu, idx);
859 break;
860 case KVM_S390_VM_MIGRATION_STOP:
861 res = kvm_s390_vm_stop_migration(kvm);
862 break;
863 default:
864 break;
865 }
866 mutex_unlock(&kvm->lock);
867
868 return res;
869 }
870
871 static int kvm_s390_vm_get_migration(struct kvm *kvm,
872 struct kvm_device_attr *attr)
873 {
874 u64 mig = (kvm->arch.migration_state != NULL);
875
876 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
877 return -ENXIO;
878
879 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
880 return -EFAULT;
881 return 0;
882 }
883
884 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
885 {
886 struct kvm_s390_vm_tod_clock gtod;
887
888 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
889 return -EFAULT;
890
891 if (test_kvm_facility(kvm, 139))
892 kvm_s390_set_tod_clock_ext(kvm, &gtod);
893 else if (gtod.epoch_idx == 0)
894 kvm_s390_set_tod_clock(kvm, gtod.tod);
895 else
896 return -EINVAL;
897
898 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
899 gtod.epoch_idx, gtod.tod);
900
901 return 0;
902 }
903
904 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906 u8 gtod_high;
907
908 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
909 sizeof(gtod_high)))
910 return -EFAULT;
911
912 if (gtod_high != 0)
913 return -EINVAL;
914 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
915
916 return 0;
917 }
918
919 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
920 {
921 u64 gtod;
922
923 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
924 return -EFAULT;
925
926 kvm_s390_set_tod_clock(kvm, gtod);
927 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
928 return 0;
929 }
930
931 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
932 {
933 int ret;
934
935 if (attr->flags)
936 return -EINVAL;
937
938 switch (attr->attr) {
939 case KVM_S390_VM_TOD_EXT:
940 ret = kvm_s390_set_tod_ext(kvm, attr);
941 break;
942 case KVM_S390_VM_TOD_HIGH:
943 ret = kvm_s390_set_tod_high(kvm, attr);
944 break;
945 case KVM_S390_VM_TOD_LOW:
946 ret = kvm_s390_set_tod_low(kvm, attr);
947 break;
948 default:
949 ret = -ENXIO;
950 break;
951 }
952 return ret;
953 }
954
955 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
956 struct kvm_s390_vm_tod_clock *gtod)
957 {
958 struct kvm_s390_tod_clock_ext htod;
959
960 preempt_disable();
961
962 get_tod_clock_ext((char *)&htod);
963
964 gtod->tod = htod.tod + kvm->arch.epoch;
965 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
966
967 if (gtod->tod < htod.tod)
968 gtod->epoch_idx += 1;
969
970 preempt_enable();
971 }
972
973 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
974 {
975 struct kvm_s390_vm_tod_clock gtod;
976
977 memset(&gtod, 0, sizeof(gtod));
978
979 if (test_kvm_facility(kvm, 139))
980 kvm_s390_get_tod_clock_ext(kvm, &gtod);
981 else
982 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
983
984 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
985 return -EFAULT;
986
987 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
988 gtod.epoch_idx, gtod.tod);
989 return 0;
990 }
991
992 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
993 {
994 u8 gtod_high = 0;
995
996 if (copy_to_user((void __user *)attr->addr, &gtod_high,
997 sizeof(gtod_high)))
998 return -EFAULT;
999 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1000
1001 return 0;
1002 }
1003
1004 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1005 {
1006 u64 gtod;
1007
1008 gtod = kvm_s390_get_tod_clock_fast(kvm);
1009 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1010 return -EFAULT;
1011 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1012
1013 return 0;
1014 }
1015
1016 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1017 {
1018 int ret;
1019
1020 if (attr->flags)
1021 return -EINVAL;
1022
1023 switch (attr->attr) {
1024 case KVM_S390_VM_TOD_EXT:
1025 ret = kvm_s390_get_tod_ext(kvm, attr);
1026 break;
1027 case KVM_S390_VM_TOD_HIGH:
1028 ret = kvm_s390_get_tod_high(kvm, attr);
1029 break;
1030 case KVM_S390_VM_TOD_LOW:
1031 ret = kvm_s390_get_tod_low(kvm, attr);
1032 break;
1033 default:
1034 ret = -ENXIO;
1035 break;
1036 }
1037 return ret;
1038 }
1039
1040 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1041 {
1042 struct kvm_s390_vm_cpu_processor *proc;
1043 u16 lowest_ibc, unblocked_ibc;
1044 int ret = 0;
1045
1046 mutex_lock(&kvm->lock);
1047 if (kvm->created_vcpus) {
1048 ret = -EBUSY;
1049 goto out;
1050 }
1051 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1052 if (!proc) {
1053 ret = -ENOMEM;
1054 goto out;
1055 }
1056 if (!copy_from_user(proc, (void __user *)attr->addr,
1057 sizeof(*proc))) {
1058 kvm->arch.model.cpuid = proc->cpuid;
1059 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1060 unblocked_ibc = sclp.ibc & 0xfff;
1061 if (lowest_ibc && proc->ibc) {
1062 if (proc->ibc > unblocked_ibc)
1063 kvm->arch.model.ibc = unblocked_ibc;
1064 else if (proc->ibc < lowest_ibc)
1065 kvm->arch.model.ibc = lowest_ibc;
1066 else
1067 kvm->arch.model.ibc = proc->ibc;
1068 }
1069 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1070 S390_ARCH_FAC_LIST_SIZE_BYTE);
1071 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1072 kvm->arch.model.ibc,
1073 kvm->arch.model.cpuid);
1074 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1075 kvm->arch.model.fac_list[0],
1076 kvm->arch.model.fac_list[1],
1077 kvm->arch.model.fac_list[2]);
1078 } else
1079 ret = -EFAULT;
1080 kfree(proc);
1081 out:
1082 mutex_unlock(&kvm->lock);
1083 return ret;
1084 }
1085
1086 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1087 struct kvm_device_attr *attr)
1088 {
1089 struct kvm_s390_vm_cpu_feat data;
1090 int ret = -EBUSY;
1091
1092 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1093 return -EFAULT;
1094 if (!bitmap_subset((unsigned long *) data.feat,
1095 kvm_s390_available_cpu_feat,
1096 KVM_S390_VM_CPU_FEAT_NR_BITS))
1097 return -EINVAL;
1098
1099 mutex_lock(&kvm->lock);
1100 if (!atomic_read(&kvm->online_vcpus)) {
1101 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1102 KVM_S390_VM_CPU_FEAT_NR_BITS);
1103 ret = 0;
1104 }
1105 mutex_unlock(&kvm->lock);
1106 return ret;
1107 }
1108
1109 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1110 struct kvm_device_attr *attr)
1111 {
1112 /*
1113 * Once supported by kernel + hw, we have to store the subfunctions
1114 * in kvm->arch and remember that user space configured them.
1115 */
1116 return -ENXIO;
1117 }
1118
1119 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1120 {
1121 int ret = -ENXIO;
1122
1123 switch (attr->attr) {
1124 case KVM_S390_VM_CPU_PROCESSOR:
1125 ret = kvm_s390_set_processor(kvm, attr);
1126 break;
1127 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1128 ret = kvm_s390_set_processor_feat(kvm, attr);
1129 break;
1130 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1131 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1132 break;
1133 }
1134 return ret;
1135 }
1136
1137 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139 struct kvm_s390_vm_cpu_processor *proc;
1140 int ret = 0;
1141
1142 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1143 if (!proc) {
1144 ret = -ENOMEM;
1145 goto out;
1146 }
1147 proc->cpuid = kvm->arch.model.cpuid;
1148 proc->ibc = kvm->arch.model.ibc;
1149 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1150 S390_ARCH_FAC_LIST_SIZE_BYTE);
1151 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1152 kvm->arch.model.ibc,
1153 kvm->arch.model.cpuid);
1154 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1155 kvm->arch.model.fac_list[0],
1156 kvm->arch.model.fac_list[1],
1157 kvm->arch.model.fac_list[2]);
1158 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1159 ret = -EFAULT;
1160 kfree(proc);
1161 out:
1162 return ret;
1163 }
1164
1165 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1166 {
1167 struct kvm_s390_vm_cpu_machine *mach;
1168 int ret = 0;
1169
1170 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1171 if (!mach) {
1172 ret = -ENOMEM;
1173 goto out;
1174 }
1175 get_cpu_id((struct cpuid *) &mach->cpuid);
1176 mach->ibc = sclp.ibc;
1177 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1178 S390_ARCH_FAC_LIST_SIZE_BYTE);
1179 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1180 sizeof(S390_lowcore.stfle_fac_list));
1181 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1182 kvm->arch.model.ibc,
1183 kvm->arch.model.cpuid);
1184 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1185 mach->fac_mask[0],
1186 mach->fac_mask[1],
1187 mach->fac_mask[2]);
1188 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1189 mach->fac_list[0],
1190 mach->fac_list[1],
1191 mach->fac_list[2]);
1192 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1193 ret = -EFAULT;
1194 kfree(mach);
1195 out:
1196 return ret;
1197 }
1198
1199 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1200 struct kvm_device_attr *attr)
1201 {
1202 struct kvm_s390_vm_cpu_feat data;
1203
1204 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1205 KVM_S390_VM_CPU_FEAT_NR_BITS);
1206 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1207 return -EFAULT;
1208 return 0;
1209 }
1210
1211 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1212 struct kvm_device_attr *attr)
1213 {
1214 struct kvm_s390_vm_cpu_feat data;
1215
1216 bitmap_copy((unsigned long *) data.feat,
1217 kvm_s390_available_cpu_feat,
1218 KVM_S390_VM_CPU_FEAT_NR_BITS);
1219 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1220 return -EFAULT;
1221 return 0;
1222 }
1223
1224 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1225 struct kvm_device_attr *attr)
1226 {
1227 /*
1228 * Once we can actually configure subfunctions (kernel + hw support),
1229 * we have to check if they were already set by user space, if so copy
1230 * them from kvm->arch.
1231 */
1232 return -ENXIO;
1233 }
1234
1235 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1236 struct kvm_device_attr *attr)
1237 {
1238 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1239 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1240 return -EFAULT;
1241 return 0;
1242 }
1243 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1244 {
1245 int ret = -ENXIO;
1246
1247 switch (attr->attr) {
1248 case KVM_S390_VM_CPU_PROCESSOR:
1249 ret = kvm_s390_get_processor(kvm, attr);
1250 break;
1251 case KVM_S390_VM_CPU_MACHINE:
1252 ret = kvm_s390_get_machine(kvm, attr);
1253 break;
1254 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1255 ret = kvm_s390_get_processor_feat(kvm, attr);
1256 break;
1257 case KVM_S390_VM_CPU_MACHINE_FEAT:
1258 ret = kvm_s390_get_machine_feat(kvm, attr);
1259 break;
1260 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1261 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1262 break;
1263 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1264 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1265 break;
1266 }
1267 return ret;
1268 }
1269
1270 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1271 {
1272 int ret;
1273
1274 switch (attr->group) {
1275 case KVM_S390_VM_MEM_CTRL:
1276 ret = kvm_s390_set_mem_control(kvm, attr);
1277 break;
1278 case KVM_S390_VM_TOD:
1279 ret = kvm_s390_set_tod(kvm, attr);
1280 break;
1281 case KVM_S390_VM_CPU_MODEL:
1282 ret = kvm_s390_set_cpu_model(kvm, attr);
1283 break;
1284 case KVM_S390_VM_CRYPTO:
1285 ret = kvm_s390_vm_set_crypto(kvm, attr);
1286 break;
1287 case KVM_S390_VM_MIGRATION:
1288 ret = kvm_s390_vm_set_migration(kvm, attr);
1289 break;
1290 default:
1291 ret = -ENXIO;
1292 break;
1293 }
1294
1295 return ret;
1296 }
1297
1298 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1299 {
1300 int ret;
1301
1302 switch (attr->group) {
1303 case KVM_S390_VM_MEM_CTRL:
1304 ret = kvm_s390_get_mem_control(kvm, attr);
1305 break;
1306 case KVM_S390_VM_TOD:
1307 ret = kvm_s390_get_tod(kvm, attr);
1308 break;
1309 case KVM_S390_VM_CPU_MODEL:
1310 ret = kvm_s390_get_cpu_model(kvm, attr);
1311 break;
1312 case KVM_S390_VM_MIGRATION:
1313 ret = kvm_s390_vm_get_migration(kvm, attr);
1314 break;
1315 default:
1316 ret = -ENXIO;
1317 break;
1318 }
1319
1320 return ret;
1321 }
1322
1323 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1324 {
1325 int ret;
1326
1327 switch (attr->group) {
1328 case KVM_S390_VM_MEM_CTRL:
1329 switch (attr->attr) {
1330 case KVM_S390_VM_MEM_ENABLE_CMMA:
1331 case KVM_S390_VM_MEM_CLR_CMMA:
1332 ret = sclp.has_cmma ? 0 : -ENXIO;
1333 break;
1334 case KVM_S390_VM_MEM_LIMIT_SIZE:
1335 ret = 0;
1336 break;
1337 default:
1338 ret = -ENXIO;
1339 break;
1340 }
1341 break;
1342 case KVM_S390_VM_TOD:
1343 switch (attr->attr) {
1344 case KVM_S390_VM_TOD_LOW:
1345 case KVM_S390_VM_TOD_HIGH:
1346 ret = 0;
1347 break;
1348 default:
1349 ret = -ENXIO;
1350 break;
1351 }
1352 break;
1353 case KVM_S390_VM_CPU_MODEL:
1354 switch (attr->attr) {
1355 case KVM_S390_VM_CPU_PROCESSOR:
1356 case KVM_S390_VM_CPU_MACHINE:
1357 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1358 case KVM_S390_VM_CPU_MACHINE_FEAT:
1359 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1360 ret = 0;
1361 break;
1362 /* configuring subfunctions is not supported yet */
1363 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1364 default:
1365 ret = -ENXIO;
1366 break;
1367 }
1368 break;
1369 case KVM_S390_VM_CRYPTO:
1370 switch (attr->attr) {
1371 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1372 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1373 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1374 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1375 ret = 0;
1376 break;
1377 default:
1378 ret = -ENXIO;
1379 break;
1380 }
1381 break;
1382 case KVM_S390_VM_MIGRATION:
1383 ret = 0;
1384 break;
1385 default:
1386 ret = -ENXIO;
1387 break;
1388 }
1389
1390 return ret;
1391 }
1392
1393 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1394 {
1395 uint8_t *keys;
1396 uint64_t hva;
1397 int srcu_idx, i, r = 0;
1398
1399 if (args->flags != 0)
1400 return -EINVAL;
1401
1402 /* Is this guest using storage keys? */
1403 if (!mm_use_skey(current->mm))
1404 return KVM_S390_GET_SKEYS_NONE;
1405
1406 /* Enforce sane limit on memory allocation */
1407 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1408 return -EINVAL;
1409
1410 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1411 if (!keys)
1412 return -ENOMEM;
1413
1414 down_read(&current->mm->mmap_sem);
1415 srcu_idx = srcu_read_lock(&kvm->srcu);
1416 for (i = 0; i < args->count; i++) {
1417 hva = gfn_to_hva(kvm, args->start_gfn + i);
1418 if (kvm_is_error_hva(hva)) {
1419 r = -EFAULT;
1420 break;
1421 }
1422
1423 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1424 if (r)
1425 break;
1426 }
1427 srcu_read_unlock(&kvm->srcu, srcu_idx);
1428 up_read(&current->mm->mmap_sem);
1429
1430 if (!r) {
1431 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1432 sizeof(uint8_t) * args->count);
1433 if (r)
1434 r = -EFAULT;
1435 }
1436
1437 kvfree(keys);
1438 return r;
1439 }
1440
1441 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1442 {
1443 uint8_t *keys;
1444 uint64_t hva;
1445 int srcu_idx, i, r = 0;
1446
1447 if (args->flags != 0)
1448 return -EINVAL;
1449
1450 /* Enforce sane limit on memory allocation */
1451 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1452 return -EINVAL;
1453
1454 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1455 if (!keys)
1456 return -ENOMEM;
1457
1458 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1459 sizeof(uint8_t) * args->count);
1460 if (r) {
1461 r = -EFAULT;
1462 goto out;
1463 }
1464
1465 /* Enable storage key handling for the guest */
1466 r = s390_enable_skey();
1467 if (r)
1468 goto out;
1469
1470 down_read(&current->mm->mmap_sem);
1471 srcu_idx = srcu_read_lock(&kvm->srcu);
1472 for (i = 0; i < args->count; i++) {
1473 hva = gfn_to_hva(kvm, args->start_gfn + i);
1474 if (kvm_is_error_hva(hva)) {
1475 r = -EFAULT;
1476 break;
1477 }
1478
1479 /* Lowest order bit is reserved */
1480 if (keys[i] & 0x01) {
1481 r = -EINVAL;
1482 break;
1483 }
1484
1485 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1486 if (r)
1487 break;
1488 }
1489 srcu_read_unlock(&kvm->srcu, srcu_idx);
1490 up_read(&current->mm->mmap_sem);
1491 out:
1492 kvfree(keys);
1493 return r;
1494 }
1495
1496 /*
1497 * Base address and length must be sent at the start of each block, therefore
1498 * it's cheaper to send some clean data, as long as it's less than the size of
1499 * two longs.
1500 */
1501 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1502 /* for consistency */
1503 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1504
1505 /*
1506 * This function searches for the next page with dirty CMMA attributes, and
1507 * saves the attributes in the buffer up to either the end of the buffer or
1508 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1509 * no trailing clean bytes are saved.
1510 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1511 * output buffer will indicate 0 as length.
1512 */
1513 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1514 struct kvm_s390_cmma_log *args)
1515 {
1516 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1517 unsigned long bufsize, hva, pgstev, i, next, cur;
1518 int srcu_idx, peek, r = 0, rr;
1519 u8 *res;
1520
1521 cur = args->start_gfn;
1522 i = next = pgstev = 0;
1523
1524 if (unlikely(!kvm->arch.use_cmma))
1525 return -ENXIO;
1526 /* Invalid/unsupported flags were specified */
1527 if (args->flags & ~KVM_S390_CMMA_PEEK)
1528 return -EINVAL;
1529 /* Migration mode query, and we are not doing a migration */
1530 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1531 if (!peek && !s)
1532 return -EINVAL;
1533 /* CMMA is disabled or was not used, or the buffer has length zero */
1534 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1535 if (!bufsize || !kvm->mm->context.use_cmma) {
1536 memset(args, 0, sizeof(*args));
1537 return 0;
1538 }
1539
1540 if (!peek) {
1541 /* We are not peeking, and there are no dirty pages */
1542 if (!atomic64_read(&s->dirty_pages)) {
1543 memset(args, 0, sizeof(*args));
1544 return 0;
1545 }
1546 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1547 args->start_gfn);
1548 if (cur >= s->bitmap_size) /* nothing found, loop back */
1549 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1550 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1551 memset(args, 0, sizeof(*args));
1552 return 0;
1553 }
1554 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1555 }
1556
1557 res = vmalloc(bufsize);
1558 if (!res)
1559 return -ENOMEM;
1560
1561 args->start_gfn = cur;
1562
1563 down_read(&kvm->mm->mmap_sem);
1564 srcu_idx = srcu_read_lock(&kvm->srcu);
1565 while (i < bufsize) {
1566 hva = gfn_to_hva(kvm, cur);
1567 if (kvm_is_error_hva(hva)) {
1568 r = -EFAULT;
1569 break;
1570 }
1571 /* decrement only if we actually flipped the bit to 0 */
1572 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1573 atomic64_dec(&s->dirty_pages);
1574 r = get_pgste(kvm->mm, hva, &pgstev);
1575 if (r < 0)
1576 pgstev = 0;
1577 /* save the value */
1578 res[i++] = (pgstev >> 24) & 0x43;
1579 /*
1580 * if the next bit is too far away, stop.
1581 * if we reached the previous "next", find the next one
1582 */
1583 if (!peek) {
1584 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1585 break;
1586 if (cur == next)
1587 next = find_next_bit(s->pgste_bitmap,
1588 s->bitmap_size, cur + 1);
1589 /* reached the end of the bitmap or of the buffer, stop */
1590 if ((next >= s->bitmap_size) ||
1591 (next >= args->start_gfn + bufsize))
1592 break;
1593 }
1594 cur++;
1595 }
1596 srcu_read_unlock(&kvm->srcu, srcu_idx);
1597 up_read(&kvm->mm->mmap_sem);
1598 args->count = i;
1599 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1600
1601 rr = copy_to_user((void __user *)args->values, res, args->count);
1602 if (rr)
1603 r = -EFAULT;
1604
1605 vfree(res);
1606 return r;
1607 }
1608
1609 /*
1610 * This function sets the CMMA attributes for the given pages. If the input
1611 * buffer has zero length, no action is taken, otherwise the attributes are
1612 * set and the mm->context.use_cmma flag is set.
1613 */
1614 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1615 const struct kvm_s390_cmma_log *args)
1616 {
1617 unsigned long hva, mask, pgstev, i;
1618 uint8_t *bits;
1619 int srcu_idx, r = 0;
1620
1621 mask = args->mask;
1622
1623 if (!kvm->arch.use_cmma)
1624 return -ENXIO;
1625 /* invalid/unsupported flags */
1626 if (args->flags != 0)
1627 return -EINVAL;
1628 /* Enforce sane limit on memory allocation */
1629 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1630 return -EINVAL;
1631 /* Nothing to do */
1632 if (args->count == 0)
1633 return 0;
1634
1635 bits = vmalloc(sizeof(*bits) * args->count);
1636 if (!bits)
1637 return -ENOMEM;
1638
1639 r = copy_from_user(bits, (void __user *)args->values, args->count);
1640 if (r) {
1641 r = -EFAULT;
1642 goto out;
1643 }
1644
1645 down_read(&kvm->mm->mmap_sem);
1646 srcu_idx = srcu_read_lock(&kvm->srcu);
1647 for (i = 0; i < args->count; i++) {
1648 hva = gfn_to_hva(kvm, args->start_gfn + i);
1649 if (kvm_is_error_hva(hva)) {
1650 r = -EFAULT;
1651 break;
1652 }
1653
1654 pgstev = bits[i];
1655 pgstev = pgstev << 24;
1656 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1657 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1658 }
1659 srcu_read_unlock(&kvm->srcu, srcu_idx);
1660 up_read(&kvm->mm->mmap_sem);
1661
1662 if (!kvm->mm->context.use_cmma) {
1663 down_write(&kvm->mm->mmap_sem);
1664 kvm->mm->context.use_cmma = 1;
1665 up_write(&kvm->mm->mmap_sem);
1666 }
1667 out:
1668 vfree(bits);
1669 return r;
1670 }
1671
1672 long kvm_arch_vm_ioctl(struct file *filp,
1673 unsigned int ioctl, unsigned long arg)
1674 {
1675 struct kvm *kvm = filp->private_data;
1676 void __user *argp = (void __user *)arg;
1677 struct kvm_device_attr attr;
1678 int r;
1679
1680 switch (ioctl) {
1681 case KVM_S390_INTERRUPT: {
1682 struct kvm_s390_interrupt s390int;
1683
1684 r = -EFAULT;
1685 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1686 break;
1687 r = kvm_s390_inject_vm(kvm, &s390int);
1688 break;
1689 }
1690 case KVM_ENABLE_CAP: {
1691 struct kvm_enable_cap cap;
1692 r = -EFAULT;
1693 if (copy_from_user(&cap, argp, sizeof(cap)))
1694 break;
1695 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1696 break;
1697 }
1698 case KVM_CREATE_IRQCHIP: {
1699 struct kvm_irq_routing_entry routing;
1700
1701 r = -EINVAL;
1702 if (kvm->arch.use_irqchip) {
1703 /* Set up dummy routing. */
1704 memset(&routing, 0, sizeof(routing));
1705 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1706 }
1707 break;
1708 }
1709 case KVM_SET_DEVICE_ATTR: {
1710 r = -EFAULT;
1711 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1712 break;
1713 r = kvm_s390_vm_set_attr(kvm, &attr);
1714 break;
1715 }
1716 case KVM_GET_DEVICE_ATTR: {
1717 r = -EFAULT;
1718 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1719 break;
1720 r = kvm_s390_vm_get_attr(kvm, &attr);
1721 break;
1722 }
1723 case KVM_HAS_DEVICE_ATTR: {
1724 r = -EFAULT;
1725 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1726 break;
1727 r = kvm_s390_vm_has_attr(kvm, &attr);
1728 break;
1729 }
1730 case KVM_S390_GET_SKEYS: {
1731 struct kvm_s390_skeys args;
1732
1733 r = -EFAULT;
1734 if (copy_from_user(&args, argp,
1735 sizeof(struct kvm_s390_skeys)))
1736 break;
1737 r = kvm_s390_get_skeys(kvm, &args);
1738 break;
1739 }
1740 case KVM_S390_SET_SKEYS: {
1741 struct kvm_s390_skeys args;
1742
1743 r = -EFAULT;
1744 if (copy_from_user(&args, argp,
1745 sizeof(struct kvm_s390_skeys)))
1746 break;
1747 r = kvm_s390_set_skeys(kvm, &args);
1748 break;
1749 }
1750 case KVM_S390_GET_CMMA_BITS: {
1751 struct kvm_s390_cmma_log args;
1752
1753 r = -EFAULT;
1754 if (copy_from_user(&args, argp, sizeof(args)))
1755 break;
1756 r = kvm_s390_get_cmma_bits(kvm, &args);
1757 if (!r) {
1758 r = copy_to_user(argp, &args, sizeof(args));
1759 if (r)
1760 r = -EFAULT;
1761 }
1762 break;
1763 }
1764 case KVM_S390_SET_CMMA_BITS: {
1765 struct kvm_s390_cmma_log args;
1766
1767 r = -EFAULT;
1768 if (copy_from_user(&args, argp, sizeof(args)))
1769 break;
1770 r = kvm_s390_set_cmma_bits(kvm, &args);
1771 break;
1772 }
1773 default:
1774 r = -ENOTTY;
1775 }
1776
1777 return r;
1778 }
1779
1780 static int kvm_s390_query_ap_config(u8 *config)
1781 {
1782 u32 fcn_code = 0x04000000UL;
1783 u32 cc = 0;
1784
1785 memset(config, 0, 128);
1786 asm volatile(
1787 "lgr 0,%1\n"
1788 "lgr 2,%2\n"
1789 ".long 0xb2af0000\n" /* PQAP(QCI) */
1790 "0: ipm %0\n"
1791 "srl %0,28\n"
1792 "1:\n"
1793 EX_TABLE(0b, 1b)
1794 : "+r" (cc)
1795 : "r" (fcn_code), "r" (config)
1796 : "cc", "0", "2", "memory"
1797 );
1798
1799 return cc;
1800 }
1801
1802 static int kvm_s390_apxa_installed(void)
1803 {
1804 u8 config[128];
1805 int cc;
1806
1807 if (test_facility(12)) {
1808 cc = kvm_s390_query_ap_config(config);
1809
1810 if (cc)
1811 pr_err("PQAP(QCI) failed with cc=%d", cc);
1812 else
1813 return config[0] & 0x40;
1814 }
1815
1816 return 0;
1817 }
1818
1819 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1820 {
1821 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1822
1823 if (kvm_s390_apxa_installed())
1824 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1825 else
1826 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1827 }
1828
1829 static u64 kvm_s390_get_initial_cpuid(void)
1830 {
1831 struct cpuid cpuid;
1832
1833 get_cpu_id(&cpuid);
1834 cpuid.version = 0xff;
1835 return *((u64 *) &cpuid);
1836 }
1837
1838 static void kvm_s390_crypto_init(struct kvm *kvm)
1839 {
1840 if (!test_kvm_facility(kvm, 76))
1841 return;
1842
1843 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1844 kvm_s390_set_crycb_format(kvm);
1845
1846 /* Enable AES/DEA protected key functions by default */
1847 kvm->arch.crypto.aes_kw = 1;
1848 kvm->arch.crypto.dea_kw = 1;
1849 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1850 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1851 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1852 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1853 }
1854
1855 static void sca_dispose(struct kvm *kvm)
1856 {
1857 if (kvm->arch.use_esca)
1858 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1859 else
1860 free_page((unsigned long)(kvm->arch.sca));
1861 kvm->arch.sca = NULL;
1862 }
1863
1864 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1865 {
1866 gfp_t alloc_flags = GFP_KERNEL;
1867 int i, rc;
1868 char debug_name[16];
1869 static unsigned long sca_offset;
1870
1871 rc = -EINVAL;
1872 #ifdef CONFIG_KVM_S390_UCONTROL
1873 if (type & ~KVM_VM_S390_UCONTROL)
1874 goto out_err;
1875 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1876 goto out_err;
1877 #else
1878 if (type)
1879 goto out_err;
1880 #endif
1881
1882 rc = s390_enable_sie();
1883 if (rc)
1884 goto out_err;
1885
1886 rc = -ENOMEM;
1887
1888 kvm->arch.use_esca = 0; /* start with basic SCA */
1889 if (!sclp.has_64bscao)
1890 alloc_flags |= GFP_DMA;
1891 rwlock_init(&kvm->arch.sca_lock);
1892 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1893 if (!kvm->arch.sca)
1894 goto out_err;
1895 spin_lock(&kvm_lock);
1896 sca_offset += 16;
1897 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1898 sca_offset = 0;
1899 kvm->arch.sca = (struct bsca_block *)
1900 ((char *) kvm->arch.sca + sca_offset);
1901 spin_unlock(&kvm_lock);
1902
1903 sprintf(debug_name, "kvm-%u", current->pid);
1904
1905 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1906 if (!kvm->arch.dbf)
1907 goto out_err;
1908
1909 kvm->arch.sie_page2 =
1910 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1911 if (!kvm->arch.sie_page2)
1912 goto out_err;
1913
1914 /* Populate the facility mask initially. */
1915 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1916 sizeof(S390_lowcore.stfle_fac_list));
1917 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1918 if (i < kvm_s390_fac_list_mask_size())
1919 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1920 else
1921 kvm->arch.model.fac_mask[i] = 0UL;
1922 }
1923
1924 /* Populate the facility list initially. */
1925 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1926 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1927 S390_ARCH_FAC_LIST_SIZE_BYTE);
1928
1929 /* we are always in czam mode - even on pre z14 machines */
1930 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1931 set_kvm_facility(kvm->arch.model.fac_list, 138);
1932 /* we emulate STHYI in kvm */
1933 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1934 set_kvm_facility(kvm->arch.model.fac_list, 74);
1935 if (MACHINE_HAS_TLB_GUEST) {
1936 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1937 set_kvm_facility(kvm->arch.model.fac_list, 147);
1938 }
1939
1940 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1941 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1942
1943 kvm_s390_crypto_init(kvm);
1944
1945 mutex_init(&kvm->arch.float_int.ais_lock);
1946 kvm->arch.float_int.simm = 0;
1947 kvm->arch.float_int.nimm = 0;
1948 spin_lock_init(&kvm->arch.float_int.lock);
1949 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1950 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1951 init_waitqueue_head(&kvm->arch.ipte_wq);
1952 mutex_init(&kvm->arch.ipte_mutex);
1953
1954 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1955 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1956
1957 if (type & KVM_VM_S390_UCONTROL) {
1958 kvm->arch.gmap = NULL;
1959 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1960 } else {
1961 if (sclp.hamax == U64_MAX)
1962 kvm->arch.mem_limit = TASK_SIZE_MAX;
1963 else
1964 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1965 sclp.hamax + 1);
1966 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1967 if (!kvm->arch.gmap)
1968 goto out_err;
1969 kvm->arch.gmap->private = kvm;
1970 kvm->arch.gmap->pfault_enabled = 0;
1971 }
1972
1973 kvm->arch.css_support = 0;
1974 kvm->arch.use_irqchip = 0;
1975 kvm->arch.epoch = 0;
1976
1977 spin_lock_init(&kvm->arch.start_stop_lock);
1978 kvm_s390_vsie_init(kvm);
1979 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1980
1981 return 0;
1982 out_err:
1983 free_page((unsigned long)kvm->arch.sie_page2);
1984 debug_unregister(kvm->arch.dbf);
1985 sca_dispose(kvm);
1986 KVM_EVENT(3, "creation of vm failed: %d", rc);
1987 return rc;
1988 }
1989
1990 bool kvm_arch_has_vcpu_debugfs(void)
1991 {
1992 return false;
1993 }
1994
1995 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1996 {
1997 return 0;
1998 }
1999
2000 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2001 {
2002 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2003 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2004 kvm_s390_clear_local_irqs(vcpu);
2005 kvm_clear_async_pf_completion_queue(vcpu);
2006 if (!kvm_is_ucontrol(vcpu->kvm))
2007 sca_del_vcpu(vcpu);
2008
2009 if (kvm_is_ucontrol(vcpu->kvm))
2010 gmap_remove(vcpu->arch.gmap);
2011
2012 if (vcpu->kvm->arch.use_cmma)
2013 kvm_s390_vcpu_unsetup_cmma(vcpu);
2014 free_page((unsigned long)(vcpu->arch.sie_block));
2015
2016 kvm_vcpu_uninit(vcpu);
2017 kmem_cache_free(kvm_vcpu_cache, vcpu);
2018 }
2019
2020 static void kvm_free_vcpus(struct kvm *kvm)
2021 {
2022 unsigned int i;
2023 struct kvm_vcpu *vcpu;
2024
2025 kvm_for_each_vcpu(i, vcpu, kvm)
2026 kvm_arch_vcpu_destroy(vcpu);
2027
2028 mutex_lock(&kvm->lock);
2029 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2030 kvm->vcpus[i] = NULL;
2031
2032 atomic_set(&kvm->online_vcpus, 0);
2033 mutex_unlock(&kvm->lock);
2034 }
2035
2036 void kvm_arch_destroy_vm(struct kvm *kvm)
2037 {
2038 kvm_free_vcpus(kvm);
2039 sca_dispose(kvm);
2040 debug_unregister(kvm->arch.dbf);
2041 free_page((unsigned long)kvm->arch.sie_page2);
2042 if (!kvm_is_ucontrol(kvm))
2043 gmap_remove(kvm->arch.gmap);
2044 kvm_s390_destroy_adapters(kvm);
2045 kvm_s390_clear_float_irqs(kvm);
2046 kvm_s390_vsie_destroy(kvm);
2047 if (kvm->arch.migration_state) {
2048 vfree(kvm->arch.migration_state->pgste_bitmap);
2049 kfree(kvm->arch.migration_state);
2050 }
2051 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2052 }
2053
2054 /* Section: vcpu related */
2055 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2056 {
2057 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2058 if (!vcpu->arch.gmap)
2059 return -ENOMEM;
2060 vcpu->arch.gmap->private = vcpu->kvm;
2061
2062 return 0;
2063 }
2064
2065 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2066 {
2067 if (!kvm_s390_use_sca_entries())
2068 return;
2069 read_lock(&vcpu->kvm->arch.sca_lock);
2070 if (vcpu->kvm->arch.use_esca) {
2071 struct esca_block *sca = vcpu->kvm->arch.sca;
2072
2073 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2074 sca->cpu[vcpu->vcpu_id].sda = 0;
2075 } else {
2076 struct bsca_block *sca = vcpu->kvm->arch.sca;
2077
2078 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2079 sca->cpu[vcpu->vcpu_id].sda = 0;
2080 }
2081 read_unlock(&vcpu->kvm->arch.sca_lock);
2082 }
2083
2084 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2085 {
2086 if (!kvm_s390_use_sca_entries()) {
2087 struct bsca_block *sca = vcpu->kvm->arch.sca;
2088
2089 /* we still need the basic sca for the ipte control */
2090 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2091 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2092 }
2093 read_lock(&vcpu->kvm->arch.sca_lock);
2094 if (vcpu->kvm->arch.use_esca) {
2095 struct esca_block *sca = vcpu->kvm->arch.sca;
2096
2097 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2098 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2099 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2100 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2101 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2102 } else {
2103 struct bsca_block *sca = vcpu->kvm->arch.sca;
2104
2105 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2106 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2107 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2108 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2109 }
2110 read_unlock(&vcpu->kvm->arch.sca_lock);
2111 }
2112
2113 /* Basic SCA to Extended SCA data copy routines */
2114 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2115 {
2116 d->sda = s->sda;
2117 d->sigp_ctrl.c = s->sigp_ctrl.c;
2118 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2119 }
2120
2121 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2122 {
2123 int i;
2124
2125 d->ipte_control = s->ipte_control;
2126 d->mcn[0] = s->mcn;
2127 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2128 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2129 }
2130
2131 static int sca_switch_to_extended(struct kvm *kvm)
2132 {
2133 struct bsca_block *old_sca = kvm->arch.sca;
2134 struct esca_block *new_sca;
2135 struct kvm_vcpu *vcpu;
2136 unsigned int vcpu_idx;
2137 u32 scaol, scaoh;
2138
2139 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2140 if (!new_sca)
2141 return -ENOMEM;
2142
2143 scaoh = (u32)((u64)(new_sca) >> 32);
2144 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2145
2146 kvm_s390_vcpu_block_all(kvm);
2147 write_lock(&kvm->arch.sca_lock);
2148
2149 sca_copy_b_to_e(new_sca, old_sca);
2150
2151 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2152 vcpu->arch.sie_block->scaoh = scaoh;
2153 vcpu->arch.sie_block->scaol = scaol;
2154 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2155 }
2156 kvm->arch.sca = new_sca;
2157 kvm->arch.use_esca = 1;
2158
2159 write_unlock(&kvm->arch.sca_lock);
2160 kvm_s390_vcpu_unblock_all(kvm);
2161
2162 free_page((unsigned long)old_sca);
2163
2164 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2165 old_sca, kvm->arch.sca);
2166 return 0;
2167 }
2168
2169 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2170 {
2171 int rc;
2172
2173 if (!kvm_s390_use_sca_entries()) {
2174 if (id < KVM_MAX_VCPUS)
2175 return true;
2176 return false;
2177 }
2178 if (id < KVM_S390_BSCA_CPU_SLOTS)
2179 return true;
2180 if (!sclp.has_esca || !sclp.has_64bscao)
2181 return false;
2182
2183 mutex_lock(&kvm->lock);
2184 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2185 mutex_unlock(&kvm->lock);
2186
2187 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2188 }
2189
2190 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2191 {
2192 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2193 kvm_clear_async_pf_completion_queue(vcpu);
2194 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2195 KVM_SYNC_GPRS |
2196 KVM_SYNC_ACRS |
2197 KVM_SYNC_CRS |
2198 KVM_SYNC_ARCH0 |
2199 KVM_SYNC_PFAULT;
2200 kvm_s390_set_prefix(vcpu, 0);
2201 if (test_kvm_facility(vcpu->kvm, 64))
2202 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2203 if (test_kvm_facility(vcpu->kvm, 133))
2204 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2205 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2206 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2207 */
2208 if (MACHINE_HAS_VX)
2209 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2210 else
2211 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2212
2213 if (kvm_is_ucontrol(vcpu->kvm))
2214 return __kvm_ucontrol_vcpu_init(vcpu);
2215
2216 return 0;
2217 }
2218
2219 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2220 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2221 {
2222 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2223 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2224 vcpu->arch.cputm_start = get_tod_clock_fast();
2225 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2226 }
2227
2228 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2229 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2230 {
2231 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2232 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2233 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2234 vcpu->arch.cputm_start = 0;
2235 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2236 }
2237
2238 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2239 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2240 {
2241 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2242 vcpu->arch.cputm_enabled = true;
2243 __start_cpu_timer_accounting(vcpu);
2244 }
2245
2246 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2247 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2248 {
2249 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2250 __stop_cpu_timer_accounting(vcpu);
2251 vcpu->arch.cputm_enabled = false;
2252 }
2253
2254 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2255 {
2256 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2257 __enable_cpu_timer_accounting(vcpu);
2258 preempt_enable();
2259 }
2260
2261 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2262 {
2263 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2264 __disable_cpu_timer_accounting(vcpu);
2265 preempt_enable();
2266 }
2267
2268 /* set the cpu timer - may only be called from the VCPU thread itself */
2269 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2270 {
2271 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2272 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2273 if (vcpu->arch.cputm_enabled)
2274 vcpu->arch.cputm_start = get_tod_clock_fast();
2275 vcpu->arch.sie_block->cputm = cputm;
2276 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2277 preempt_enable();
2278 }
2279
2280 /* update and get the cpu timer - can also be called from other VCPU threads */
2281 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2282 {
2283 unsigned int seq;
2284 __u64 value;
2285
2286 if (unlikely(!vcpu->arch.cputm_enabled))
2287 return vcpu->arch.sie_block->cputm;
2288
2289 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2290 do {
2291 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2292 /*
2293 * If the writer would ever execute a read in the critical
2294 * section, e.g. in irq context, we have a deadlock.
2295 */
2296 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2297 value = vcpu->arch.sie_block->cputm;
2298 /* if cputm_start is 0, accounting is being started/stopped */
2299 if (likely(vcpu->arch.cputm_start))
2300 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2301 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2302 preempt_enable();
2303 return value;
2304 }
2305
2306 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2307 {
2308
2309 gmap_enable(vcpu->arch.enabled_gmap);
2310 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2311 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2312 __start_cpu_timer_accounting(vcpu);
2313 vcpu->cpu = cpu;
2314 }
2315
2316 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2317 {
2318 vcpu->cpu = -1;
2319 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2320 __stop_cpu_timer_accounting(vcpu);
2321 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2322 vcpu->arch.enabled_gmap = gmap_get_enabled();
2323 gmap_disable(vcpu->arch.enabled_gmap);
2324
2325 }
2326
2327 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2328 {
2329 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2330 vcpu->arch.sie_block->gpsw.mask = 0UL;
2331 vcpu->arch.sie_block->gpsw.addr = 0UL;
2332 kvm_s390_set_prefix(vcpu, 0);
2333 kvm_s390_set_cpu_timer(vcpu, 0);
2334 vcpu->arch.sie_block->ckc = 0UL;
2335 vcpu->arch.sie_block->todpr = 0;
2336 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2337 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2338 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2339 /* make sure the new fpc will be lazily loaded */
2340 save_fpu_regs();
2341 current->thread.fpu.fpc = 0;
2342 vcpu->arch.sie_block->gbea = 1;
2343 vcpu->arch.sie_block->pp = 0;
2344 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2345 kvm_clear_async_pf_completion_queue(vcpu);
2346 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2347 kvm_s390_vcpu_stop(vcpu);
2348 kvm_s390_clear_local_irqs(vcpu);
2349 }
2350
2351 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2352 {
2353 mutex_lock(&vcpu->kvm->lock);
2354 preempt_disable();
2355 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2356 preempt_enable();
2357 mutex_unlock(&vcpu->kvm->lock);
2358 if (!kvm_is_ucontrol(vcpu->kvm)) {
2359 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2360 sca_add_vcpu(vcpu);
2361 }
2362 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2363 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2364 /* make vcpu_load load the right gmap on the first trigger */
2365 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2366 }
2367
2368 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2369 {
2370 if (!test_kvm_facility(vcpu->kvm, 76))
2371 return;
2372
2373 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2374
2375 if (vcpu->kvm->arch.crypto.aes_kw)
2376 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2377 if (vcpu->kvm->arch.crypto.dea_kw)
2378 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2379
2380 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2381 }
2382
2383 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2384 {
2385 free_page(vcpu->arch.sie_block->cbrlo);
2386 vcpu->arch.sie_block->cbrlo = 0;
2387 }
2388
2389 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2390 {
2391 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2392 if (!vcpu->arch.sie_block->cbrlo)
2393 return -ENOMEM;
2394
2395 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2396 return 0;
2397 }
2398
2399 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2400 {
2401 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2402
2403 vcpu->arch.sie_block->ibc = model->ibc;
2404 if (test_kvm_facility(vcpu->kvm, 7))
2405 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2406 }
2407
2408 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2409 {
2410 int rc = 0;
2411
2412 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2413 CPUSTAT_SM |
2414 CPUSTAT_STOPPED);
2415
2416 if (test_kvm_facility(vcpu->kvm, 78))
2417 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2418 else if (test_kvm_facility(vcpu->kvm, 8))
2419 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2420
2421 kvm_s390_vcpu_setup_model(vcpu);
2422
2423 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2424 if (MACHINE_HAS_ESOP)
2425 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2426 if (test_kvm_facility(vcpu->kvm, 9))
2427 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2428 if (test_kvm_facility(vcpu->kvm, 73))
2429 vcpu->arch.sie_block->ecb |= ECB_TE;
2430
2431 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2432 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2433 if (test_kvm_facility(vcpu->kvm, 130))
2434 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2435 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2436 if (sclp.has_cei)
2437 vcpu->arch.sie_block->eca |= ECA_CEI;
2438 if (sclp.has_ib)
2439 vcpu->arch.sie_block->eca |= ECA_IB;
2440 if (sclp.has_siif)
2441 vcpu->arch.sie_block->eca |= ECA_SII;
2442 if (sclp.has_sigpif)
2443 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2444 if (test_kvm_facility(vcpu->kvm, 129)) {
2445 vcpu->arch.sie_block->eca |= ECA_VX;
2446 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2447 }
2448 if (test_kvm_facility(vcpu->kvm, 139))
2449 vcpu->arch.sie_block->ecd |= ECD_MEF;
2450
2451 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2452 | SDNXC;
2453 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2454
2455 if (sclp.has_kss)
2456 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2457 else
2458 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2459
2460 if (vcpu->kvm->arch.use_cmma) {
2461 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2462 if (rc)
2463 return rc;
2464 }
2465 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2466 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2467
2468 kvm_s390_vcpu_crypto_setup(vcpu);
2469
2470 return rc;
2471 }
2472
2473 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2474 unsigned int id)
2475 {
2476 struct kvm_vcpu *vcpu;
2477 struct sie_page *sie_page;
2478 int rc = -EINVAL;
2479
2480 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2481 goto out;
2482
2483 rc = -ENOMEM;
2484
2485 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2486 if (!vcpu)
2487 goto out;
2488
2489 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2490 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2491 if (!sie_page)
2492 goto out_free_cpu;
2493
2494 vcpu->arch.sie_block = &sie_page->sie_block;
2495 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2496
2497 /* the real guest size will always be smaller than msl */
2498 vcpu->arch.sie_block->mso = 0;
2499 vcpu->arch.sie_block->msl = sclp.hamax;
2500
2501 vcpu->arch.sie_block->icpua = id;
2502 spin_lock_init(&vcpu->arch.local_int.lock);
2503 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2504 vcpu->arch.local_int.wq = &vcpu->wq;
2505 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2506 seqcount_init(&vcpu->arch.cputm_seqcount);
2507
2508 rc = kvm_vcpu_init(vcpu, kvm, id);
2509 if (rc)
2510 goto out_free_sie_block;
2511 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2512 vcpu->arch.sie_block);
2513 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2514
2515 return vcpu;
2516 out_free_sie_block:
2517 free_page((unsigned long)(vcpu->arch.sie_block));
2518 out_free_cpu:
2519 kmem_cache_free(kvm_vcpu_cache, vcpu);
2520 out:
2521 return ERR_PTR(rc);
2522 }
2523
2524 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2525 {
2526 return kvm_s390_vcpu_has_irq(vcpu, 0);
2527 }
2528
2529 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2530 {
2531 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2532 }
2533
2534 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2535 {
2536 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2537 exit_sie(vcpu);
2538 }
2539
2540 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2541 {
2542 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2543 }
2544
2545 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2546 {
2547 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2548 exit_sie(vcpu);
2549 }
2550
2551 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2552 {
2553 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2554 }
2555
2556 /*
2557 * Kick a guest cpu out of SIE and wait until SIE is not running.
2558 * If the CPU is not running (e.g. waiting as idle) the function will
2559 * return immediately. */
2560 void exit_sie(struct kvm_vcpu *vcpu)
2561 {
2562 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2563 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2564 cpu_relax();
2565 }
2566
2567 /* Kick a guest cpu out of SIE to process a request synchronously */
2568 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2569 {
2570 kvm_make_request(req, vcpu);
2571 kvm_s390_vcpu_request(vcpu);
2572 }
2573
2574 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2575 unsigned long end)
2576 {
2577 struct kvm *kvm = gmap->private;
2578 struct kvm_vcpu *vcpu;
2579 unsigned long prefix;
2580 int i;
2581
2582 if (gmap_is_shadow(gmap))
2583 return;
2584 if (start >= 1UL << 31)
2585 /* We are only interested in prefix pages */
2586 return;
2587 kvm_for_each_vcpu(i, vcpu, kvm) {
2588 /* match against both prefix pages */
2589 prefix = kvm_s390_get_prefix(vcpu);
2590 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2591 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2592 start, end);
2593 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2594 }
2595 }
2596 }
2597
2598 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2599 {
2600 /* kvm common code refers to this, but never calls it */
2601 BUG();
2602 return 0;
2603 }
2604
2605 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2606 struct kvm_one_reg *reg)
2607 {
2608 int r = -EINVAL;
2609
2610 switch (reg->id) {
2611 case KVM_REG_S390_TODPR:
2612 r = put_user(vcpu->arch.sie_block->todpr,
2613 (u32 __user *)reg->addr);
2614 break;
2615 case KVM_REG_S390_EPOCHDIFF:
2616 r = put_user(vcpu->arch.sie_block->epoch,
2617 (u64 __user *)reg->addr);
2618 break;
2619 case KVM_REG_S390_CPU_TIMER:
2620 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2621 (u64 __user *)reg->addr);
2622 break;
2623 case KVM_REG_S390_CLOCK_COMP:
2624 r = put_user(vcpu->arch.sie_block->ckc,
2625 (u64 __user *)reg->addr);
2626 break;
2627 case KVM_REG_S390_PFTOKEN:
2628 r = put_user(vcpu->arch.pfault_token,
2629 (u64 __user *)reg->addr);
2630 break;
2631 case KVM_REG_S390_PFCOMPARE:
2632 r = put_user(vcpu->arch.pfault_compare,
2633 (u64 __user *)reg->addr);
2634 break;
2635 case KVM_REG_S390_PFSELECT:
2636 r = put_user(vcpu->arch.pfault_select,
2637 (u64 __user *)reg->addr);
2638 break;
2639 case KVM_REG_S390_PP:
2640 r = put_user(vcpu->arch.sie_block->pp,
2641 (u64 __user *)reg->addr);
2642 break;
2643 case KVM_REG_S390_GBEA:
2644 r = put_user(vcpu->arch.sie_block->gbea,
2645 (u64 __user *)reg->addr);
2646 break;
2647 default:
2648 break;
2649 }
2650
2651 return r;
2652 }
2653
2654 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2655 struct kvm_one_reg *reg)
2656 {
2657 int r = -EINVAL;
2658 __u64 val;
2659
2660 switch (reg->id) {
2661 case KVM_REG_S390_TODPR:
2662 r = get_user(vcpu->arch.sie_block->todpr,
2663 (u32 __user *)reg->addr);
2664 break;
2665 case KVM_REG_S390_EPOCHDIFF:
2666 r = get_user(vcpu->arch.sie_block->epoch,
2667 (u64 __user *)reg->addr);
2668 break;
2669 case KVM_REG_S390_CPU_TIMER:
2670 r = get_user(val, (u64 __user *)reg->addr);
2671 if (!r)
2672 kvm_s390_set_cpu_timer(vcpu, val);
2673 break;
2674 case KVM_REG_S390_CLOCK_COMP:
2675 r = get_user(vcpu->arch.sie_block->ckc,
2676 (u64 __user *)reg->addr);
2677 break;
2678 case KVM_REG_S390_PFTOKEN:
2679 r = get_user(vcpu->arch.pfault_token,
2680 (u64 __user *)reg->addr);
2681 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2682 kvm_clear_async_pf_completion_queue(vcpu);
2683 break;
2684 case KVM_REG_S390_PFCOMPARE:
2685 r = get_user(vcpu->arch.pfault_compare,
2686 (u64 __user *)reg->addr);
2687 break;
2688 case KVM_REG_S390_PFSELECT:
2689 r = get_user(vcpu->arch.pfault_select,
2690 (u64 __user *)reg->addr);
2691 break;
2692 case KVM_REG_S390_PP:
2693 r = get_user(vcpu->arch.sie_block->pp,
2694 (u64 __user *)reg->addr);
2695 break;
2696 case KVM_REG_S390_GBEA:
2697 r = get_user(vcpu->arch.sie_block->gbea,
2698 (u64 __user *)reg->addr);
2699 break;
2700 default:
2701 break;
2702 }
2703
2704 return r;
2705 }
2706
2707 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2708 {
2709 kvm_s390_vcpu_initial_reset(vcpu);
2710 return 0;
2711 }
2712
2713 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2714 {
2715 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2716 return 0;
2717 }
2718
2719 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2720 {
2721 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2722 return 0;
2723 }
2724
2725 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2726 struct kvm_sregs *sregs)
2727 {
2728 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2729 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2730 return 0;
2731 }
2732
2733 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2734 struct kvm_sregs *sregs)
2735 {
2736 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2737 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2738 return 0;
2739 }
2740
2741 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2742 {
2743 if (test_fp_ctl(fpu->fpc))
2744 return -EINVAL;
2745 vcpu->run->s.regs.fpc = fpu->fpc;
2746 if (MACHINE_HAS_VX)
2747 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2748 (freg_t *) fpu->fprs);
2749 else
2750 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2751 return 0;
2752 }
2753
2754 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2755 {
2756 /* make sure we have the latest values */
2757 save_fpu_regs();
2758 if (MACHINE_HAS_VX)
2759 convert_vx_to_fp((freg_t *) fpu->fprs,
2760 (__vector128 *) vcpu->run->s.regs.vrs);
2761 else
2762 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2763 fpu->fpc = vcpu->run->s.regs.fpc;
2764 return 0;
2765 }
2766
2767 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2768 {
2769 int rc = 0;
2770
2771 if (!is_vcpu_stopped(vcpu))
2772 rc = -EBUSY;
2773 else {
2774 vcpu->run->psw_mask = psw.mask;
2775 vcpu->run->psw_addr = psw.addr;
2776 }
2777 return rc;
2778 }
2779
2780 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2781 struct kvm_translation *tr)
2782 {
2783 return -EINVAL; /* not implemented yet */
2784 }
2785
2786 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2787 KVM_GUESTDBG_USE_HW_BP | \
2788 KVM_GUESTDBG_ENABLE)
2789
2790 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2791 struct kvm_guest_debug *dbg)
2792 {
2793 int rc = 0;
2794
2795 vcpu->guest_debug = 0;
2796 kvm_s390_clear_bp_data(vcpu);
2797
2798 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2799 return -EINVAL;
2800 if (!sclp.has_gpere)
2801 return -EINVAL;
2802
2803 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2804 vcpu->guest_debug = dbg->control;
2805 /* enforce guest PER */
2806 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2807
2808 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2809 rc = kvm_s390_import_bp_data(vcpu, dbg);
2810 } else {
2811 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2812 vcpu->arch.guestdbg.last_bp = 0;
2813 }
2814
2815 if (rc) {
2816 vcpu->guest_debug = 0;
2817 kvm_s390_clear_bp_data(vcpu);
2818 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2819 }
2820
2821 return rc;
2822 }
2823
2824 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2825 struct kvm_mp_state *mp_state)
2826 {
2827 /* CHECK_STOP and LOAD are not supported yet */
2828 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2829 KVM_MP_STATE_OPERATING;
2830 }
2831
2832 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2833 struct kvm_mp_state *mp_state)
2834 {
2835 int rc = 0;
2836
2837 /* user space knows about this interface - let it control the state */
2838 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2839
2840 switch (mp_state->mp_state) {
2841 case KVM_MP_STATE_STOPPED:
2842 kvm_s390_vcpu_stop(vcpu);
2843 break;
2844 case KVM_MP_STATE_OPERATING:
2845 kvm_s390_vcpu_start(vcpu);
2846 break;
2847 case KVM_MP_STATE_LOAD:
2848 case KVM_MP_STATE_CHECK_STOP:
2849 /* fall through - CHECK_STOP and LOAD are not supported yet */
2850 default:
2851 rc = -ENXIO;
2852 }
2853
2854 return rc;
2855 }
2856
2857 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2858 {
2859 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2860 }
2861
2862 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2863 {
2864 retry:
2865 kvm_s390_vcpu_request_handled(vcpu);
2866 if (!kvm_request_pending(vcpu))
2867 return 0;
2868 /*
2869 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2870 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2871 * This ensures that the ipte instruction for this request has
2872 * already finished. We might race against a second unmapper that
2873 * wants to set the blocking bit. Lets just retry the request loop.
2874 */
2875 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2876 int rc;
2877 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2878 kvm_s390_get_prefix(vcpu),
2879 PAGE_SIZE * 2, PROT_WRITE);
2880 if (rc) {
2881 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2882 return rc;
2883 }
2884 goto retry;
2885 }
2886
2887 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2888 vcpu->arch.sie_block->ihcpu = 0xffff;
2889 goto retry;
2890 }
2891
2892 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2893 if (!ibs_enabled(vcpu)) {
2894 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2895 atomic_or(CPUSTAT_IBS,
2896 &vcpu->arch.sie_block->cpuflags);
2897 }
2898 goto retry;
2899 }
2900
2901 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2902 if (ibs_enabled(vcpu)) {
2903 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2904 atomic_andnot(CPUSTAT_IBS,
2905 &vcpu->arch.sie_block->cpuflags);
2906 }
2907 goto retry;
2908 }
2909
2910 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2911 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2912 goto retry;
2913 }
2914
2915 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2916 /*
2917 * Disable CMMA virtualization; we will emulate the ESSA
2918 * instruction manually, in order to provide additional
2919 * functionalities needed for live migration.
2920 */
2921 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2922 goto retry;
2923 }
2924
2925 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2926 /*
2927 * Re-enable CMMA virtualization if CMMA is available and
2928 * was used.
2929 */
2930 if ((vcpu->kvm->arch.use_cmma) &&
2931 (vcpu->kvm->mm->context.use_cmma))
2932 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2933 goto retry;
2934 }
2935
2936 /* nothing to do, just clear the request */
2937 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2938
2939 return 0;
2940 }
2941
2942 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2943 const struct kvm_s390_vm_tod_clock *gtod)
2944 {
2945 struct kvm_vcpu *vcpu;
2946 struct kvm_s390_tod_clock_ext htod;
2947 int i;
2948
2949 mutex_lock(&kvm->lock);
2950 preempt_disable();
2951
2952 get_tod_clock_ext((char *)&htod);
2953
2954 kvm->arch.epoch = gtod->tod - htod.tod;
2955 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2956
2957 if (kvm->arch.epoch > gtod->tod)
2958 kvm->arch.epdx -= 1;
2959
2960 kvm_s390_vcpu_block_all(kvm);
2961 kvm_for_each_vcpu(i, vcpu, kvm) {
2962 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2963 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2964 }
2965
2966 kvm_s390_vcpu_unblock_all(kvm);
2967 preempt_enable();
2968 mutex_unlock(&kvm->lock);
2969 }
2970
2971 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2972 {
2973 struct kvm_vcpu *vcpu;
2974 int i;
2975
2976 mutex_lock(&kvm->lock);
2977 preempt_disable();
2978 kvm->arch.epoch = tod - get_tod_clock();
2979 kvm_s390_vcpu_block_all(kvm);
2980 kvm_for_each_vcpu(i, vcpu, kvm)
2981 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2982 kvm_s390_vcpu_unblock_all(kvm);
2983 preempt_enable();
2984 mutex_unlock(&kvm->lock);
2985 }
2986
2987 /**
2988 * kvm_arch_fault_in_page - fault-in guest page if necessary
2989 * @vcpu: The corresponding virtual cpu
2990 * @gpa: Guest physical address
2991 * @writable: Whether the page should be writable or not
2992 *
2993 * Make sure that a guest page has been faulted-in on the host.
2994 *
2995 * Return: Zero on success, negative error code otherwise.
2996 */
2997 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2998 {
2999 return gmap_fault(vcpu->arch.gmap, gpa,
3000 writable ? FAULT_FLAG_WRITE : 0);
3001 }
3002
3003 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3004 unsigned long token)
3005 {
3006 struct kvm_s390_interrupt inti;
3007 struct kvm_s390_irq irq;
3008
3009 if (start_token) {
3010 irq.u.ext.ext_params2 = token;
3011 irq.type = KVM_S390_INT_PFAULT_INIT;
3012 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3013 } else {
3014 inti.type = KVM_S390_INT_PFAULT_DONE;
3015 inti.parm64 = token;
3016 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3017 }
3018 }
3019
3020 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3021 struct kvm_async_pf *work)
3022 {
3023 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3024 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3025 }
3026
3027 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3028 struct kvm_async_pf *work)
3029 {
3030 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3031 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3032 }
3033
3034 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3035 struct kvm_async_pf *work)
3036 {
3037 /* s390 will always inject the page directly */
3038 }
3039
3040 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3041 {
3042 /*
3043 * s390 will always inject the page directly,
3044 * but we still want check_async_completion to cleanup
3045 */
3046 return true;
3047 }
3048
3049 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3050 {
3051 hva_t hva;
3052 struct kvm_arch_async_pf arch;
3053 int rc;
3054
3055 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3056 return 0;
3057 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3058 vcpu->arch.pfault_compare)
3059 return 0;
3060 if (psw_extint_disabled(vcpu))
3061 return 0;
3062 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3063 return 0;
3064 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3065 return 0;
3066 if (!vcpu->arch.gmap->pfault_enabled)
3067 return 0;
3068
3069 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3070 hva += current->thread.gmap_addr & ~PAGE_MASK;
3071 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3072 return 0;
3073
3074 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3075 return rc;
3076 }
3077
3078 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3079 {
3080 int rc, cpuflags;
3081
3082 /*
3083 * On s390 notifications for arriving pages will be delivered directly
3084 * to the guest but the house keeping for completed pfaults is
3085 * handled outside the worker.
3086 */
3087 kvm_check_async_pf_completion(vcpu);
3088
3089 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3090 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3091
3092 if (need_resched())
3093 schedule();
3094
3095 if (test_cpu_flag(CIF_MCCK_PENDING))
3096 s390_handle_mcck();
3097
3098 if (!kvm_is_ucontrol(vcpu->kvm)) {
3099 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3100 if (rc)
3101 return rc;
3102 }
3103
3104 rc = kvm_s390_handle_requests(vcpu);
3105 if (rc)
3106 return rc;
3107
3108 if (guestdbg_enabled(vcpu)) {
3109 kvm_s390_backup_guest_per_regs(vcpu);
3110 kvm_s390_patch_guest_per_regs(vcpu);
3111 }
3112
3113 vcpu->arch.sie_block->icptcode = 0;
3114 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3115 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3116 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3117
3118 return 0;
3119 }
3120
3121 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3122 {
3123 struct kvm_s390_pgm_info pgm_info = {
3124 .code = PGM_ADDRESSING,
3125 };
3126 u8 opcode, ilen;
3127 int rc;
3128
3129 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3130 trace_kvm_s390_sie_fault(vcpu);
3131
3132 /*
3133 * We want to inject an addressing exception, which is defined as a
3134 * suppressing or terminating exception. However, since we came here
3135 * by a DAT access exception, the PSW still points to the faulting
3136 * instruction since DAT exceptions are nullifying. So we've got
3137 * to look up the current opcode to get the length of the instruction
3138 * to be able to forward the PSW.
3139 */
3140 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3141 ilen = insn_length(opcode);
3142 if (rc < 0) {
3143 return rc;
3144 } else if (rc) {
3145 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3146 * Forward by arbitrary ilc, injection will take care of
3147 * nullification if necessary.
3148 */
3149 pgm_info = vcpu->arch.pgm;
3150 ilen = 4;
3151 }
3152 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3153 kvm_s390_forward_psw(vcpu, ilen);
3154 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3155 }
3156
3157 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3158 {
3159 struct mcck_volatile_info *mcck_info;
3160 struct sie_page *sie_page;
3161
3162 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3163 vcpu->arch.sie_block->icptcode);
3164 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3165
3166 if (guestdbg_enabled(vcpu))
3167 kvm_s390_restore_guest_per_regs(vcpu);
3168
3169 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3170 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3171
3172 if (exit_reason == -EINTR) {
3173 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3174 sie_page = container_of(vcpu->arch.sie_block,
3175 struct sie_page, sie_block);
3176 mcck_info = &sie_page->mcck_info;
3177 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3178 return 0;
3179 }
3180
3181 if (vcpu->arch.sie_block->icptcode > 0) {
3182 int rc = kvm_handle_sie_intercept(vcpu);
3183
3184 if (rc != -EOPNOTSUPP)
3185 return rc;
3186 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3187 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3188 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3189 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3190 return -EREMOTE;
3191 } else if (exit_reason != -EFAULT) {
3192 vcpu->stat.exit_null++;
3193 return 0;
3194 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3195 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3196 vcpu->run->s390_ucontrol.trans_exc_code =
3197 current->thread.gmap_addr;
3198 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3199 return -EREMOTE;
3200 } else if (current->thread.gmap_pfault) {
3201 trace_kvm_s390_major_guest_pfault(vcpu);
3202 current->thread.gmap_pfault = 0;
3203 if (kvm_arch_setup_async_pf(vcpu))
3204 return 0;
3205 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3206 }
3207 return vcpu_post_run_fault_in_sie(vcpu);
3208 }
3209
3210 static int __vcpu_run(struct kvm_vcpu *vcpu)
3211 {
3212 int rc, exit_reason;
3213
3214 /*
3215 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3216 * ning the guest), so that memslots (and other stuff) are protected
3217 */
3218 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3219
3220 do {
3221 rc = vcpu_pre_run(vcpu);
3222 if (rc)
3223 break;
3224
3225 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3226 /*
3227 * As PF_VCPU will be used in fault handler, between
3228 * guest_enter and guest_exit should be no uaccess.
3229 */
3230 local_irq_disable();
3231 guest_enter_irqoff();
3232 __disable_cpu_timer_accounting(vcpu);
3233 local_irq_enable();
3234 exit_reason = sie64a(vcpu->arch.sie_block,
3235 vcpu->run->s.regs.gprs);
3236 local_irq_disable();
3237 __enable_cpu_timer_accounting(vcpu);
3238 guest_exit_irqoff();
3239 local_irq_enable();
3240 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3241
3242 rc = vcpu_post_run(vcpu, exit_reason);
3243 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3244
3245 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3246 return rc;
3247 }
3248
3249 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3250 {
3251 struct runtime_instr_cb *riccb;
3252 struct gs_cb *gscb;
3253
3254 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3255 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3256 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3257 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3258 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3259 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3260 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3261 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3262 /* some control register changes require a tlb flush */
3263 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3264 }
3265 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3266 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3267 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3268 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3269 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3270 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3271 }
3272 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3273 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3274 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3275 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3276 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3277 kvm_clear_async_pf_completion_queue(vcpu);
3278 }
3279 /*
3280 * If userspace sets the riccb (e.g. after migration) to a valid state,
3281 * we should enable RI here instead of doing the lazy enablement.
3282 */
3283 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3284 test_kvm_facility(vcpu->kvm, 64) &&
3285 riccb->v &&
3286 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3287 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3288 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3289 }
3290 /*
3291 * If userspace sets the gscb (e.g. after migration) to non-zero,
3292 * we should enable GS here instead of doing the lazy enablement.
3293 */
3294 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3295 test_kvm_facility(vcpu->kvm, 133) &&
3296 gscb->gssm &&
3297 !vcpu->arch.gs_enabled) {
3298 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3299 vcpu->arch.sie_block->ecb |= ECB_GS;
3300 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3301 vcpu->arch.gs_enabled = 1;
3302 }
3303 save_access_regs(vcpu->arch.host_acrs);
3304 restore_access_regs(vcpu->run->s.regs.acrs);
3305 /* save host (userspace) fprs/vrs */
3306 save_fpu_regs();
3307 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3308 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3309 if (MACHINE_HAS_VX)
3310 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3311 else
3312 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3313 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3314 if (test_fp_ctl(current->thread.fpu.fpc))
3315 /* User space provided an invalid FPC, let's clear it */
3316 current->thread.fpu.fpc = 0;
3317 if (MACHINE_HAS_GS) {
3318 preempt_disable();
3319 __ctl_set_bit(2, 4);
3320 if (current->thread.gs_cb) {
3321 vcpu->arch.host_gscb = current->thread.gs_cb;
3322 save_gs_cb(vcpu->arch.host_gscb);
3323 }
3324 if (vcpu->arch.gs_enabled) {
3325 current->thread.gs_cb = (struct gs_cb *)
3326 &vcpu->run->s.regs.gscb;
3327 restore_gs_cb(current->thread.gs_cb);
3328 }
3329 preempt_enable();
3330 }
3331
3332 kvm_run->kvm_dirty_regs = 0;
3333 }
3334
3335 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3336 {
3337 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3338 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3339 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3340 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3341 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3342 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3343 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3344 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3345 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3346 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3347 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3348 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3349 save_access_regs(vcpu->run->s.regs.acrs);
3350 restore_access_regs(vcpu->arch.host_acrs);
3351 /* Save guest register state */
3352 save_fpu_regs();
3353 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3354 /* Restore will be done lazily at return */
3355 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3356 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3357 if (MACHINE_HAS_GS) {
3358 __ctl_set_bit(2, 4);
3359 if (vcpu->arch.gs_enabled)
3360 save_gs_cb(current->thread.gs_cb);
3361 preempt_disable();
3362 current->thread.gs_cb = vcpu->arch.host_gscb;
3363 restore_gs_cb(vcpu->arch.host_gscb);
3364 preempt_enable();
3365 if (!vcpu->arch.host_gscb)
3366 __ctl_clear_bit(2, 4);
3367 vcpu->arch.host_gscb = NULL;
3368 }
3369
3370 }
3371
3372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3373 {
3374 int rc;
3375
3376 if (kvm_run->immediate_exit)
3377 return -EINTR;
3378
3379 if (guestdbg_exit_pending(vcpu)) {
3380 kvm_s390_prepare_debug_exit(vcpu);
3381 return 0;
3382 }
3383
3384 kvm_sigset_activate(vcpu);
3385
3386 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3387 kvm_s390_vcpu_start(vcpu);
3388 } else if (is_vcpu_stopped(vcpu)) {
3389 pr_err_ratelimited("can't run stopped vcpu %d\n",
3390 vcpu->vcpu_id);
3391 return -EINVAL;
3392 }
3393
3394 sync_regs(vcpu, kvm_run);
3395 enable_cpu_timer_accounting(vcpu);
3396
3397 might_fault();
3398 rc = __vcpu_run(vcpu);
3399
3400 if (signal_pending(current) && !rc) {
3401 kvm_run->exit_reason = KVM_EXIT_INTR;
3402 rc = -EINTR;
3403 }
3404
3405 if (guestdbg_exit_pending(vcpu) && !rc) {
3406 kvm_s390_prepare_debug_exit(vcpu);
3407 rc = 0;
3408 }
3409
3410 if (rc == -EREMOTE) {
3411 /* userspace support is needed, kvm_run has been prepared */
3412 rc = 0;
3413 }
3414
3415 disable_cpu_timer_accounting(vcpu);
3416 store_regs(vcpu, kvm_run);
3417
3418 kvm_sigset_deactivate(vcpu);
3419
3420 vcpu->stat.exit_userspace++;
3421 return rc;
3422 }
3423
3424 /*
3425 * store status at address
3426 * we use have two special cases:
3427 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3428 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3429 */
3430 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3431 {
3432 unsigned char archmode = 1;
3433 freg_t fprs[NUM_FPRS];
3434 unsigned int px;
3435 u64 clkcomp, cputm;
3436 int rc;
3437
3438 px = kvm_s390_get_prefix(vcpu);
3439 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3440 if (write_guest_abs(vcpu, 163, &archmode, 1))
3441 return -EFAULT;
3442 gpa = 0;
3443 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3444 if (write_guest_real(vcpu, 163, &archmode, 1))
3445 return -EFAULT;
3446 gpa = px;
3447 } else
3448 gpa -= __LC_FPREGS_SAVE_AREA;
3449
3450 /* manually convert vector registers if necessary */
3451 if (MACHINE_HAS_VX) {
3452 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3453 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3454 fprs, 128);
3455 } else {
3456 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3457 vcpu->run->s.regs.fprs, 128);
3458 }
3459 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3460 vcpu->run->s.regs.gprs, 128);
3461 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3462 &vcpu->arch.sie_block->gpsw, 16);
3463 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3464 &px, 4);
3465 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3466 &vcpu->run->s.regs.fpc, 4);
3467 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3468 &vcpu->arch.sie_block->todpr, 4);
3469 cputm = kvm_s390_get_cpu_timer(vcpu);
3470 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3471 &cputm, 8);
3472 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3473 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3474 &clkcomp, 8);
3475 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3476 &vcpu->run->s.regs.acrs, 64);
3477 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3478 &vcpu->arch.sie_block->gcr, 128);
3479 return rc ? -EFAULT : 0;
3480 }
3481
3482 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3483 {
3484 /*
3485 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3486 * switch in the run ioctl. Let's update our copies before we save
3487 * it into the save area
3488 */
3489 save_fpu_regs();
3490 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491 save_access_regs(vcpu->run->s.regs.acrs);
3492
3493 return kvm_s390_store_status_unloaded(vcpu, addr);
3494 }
3495
3496 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3497 {
3498 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3499 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3500 }
3501
3502 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3503 {
3504 unsigned int i;
3505 struct kvm_vcpu *vcpu;
3506
3507 kvm_for_each_vcpu(i, vcpu, kvm) {
3508 __disable_ibs_on_vcpu(vcpu);
3509 }
3510 }
3511
3512 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3513 {
3514 if (!sclp.has_ibs)
3515 return;
3516 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3517 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3518 }
3519
3520 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3521 {
3522 int i, online_vcpus, started_vcpus = 0;
3523
3524 if (!is_vcpu_stopped(vcpu))
3525 return;
3526
3527 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3528 /* Only one cpu at a time may enter/leave the STOPPED state. */
3529 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3530 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3531
3532 for (i = 0; i < online_vcpus; i++) {
3533 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3534 started_vcpus++;
3535 }
3536
3537 if (started_vcpus == 0) {
3538 /* we're the only active VCPU -> speed it up */
3539 __enable_ibs_on_vcpu(vcpu);
3540 } else if (started_vcpus == 1) {
3541 /*
3542 * As we are starting a second VCPU, we have to disable
3543 * the IBS facility on all VCPUs to remove potentially
3544 * oustanding ENABLE requests.
3545 */
3546 __disable_ibs_on_all_vcpus(vcpu->kvm);
3547 }
3548
3549 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3550 /*
3551 * Another VCPU might have used IBS while we were offline.
3552 * Let's play safe and flush the VCPU at startup.
3553 */
3554 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3555 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3556 return;
3557 }
3558
3559 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3560 {
3561 int i, online_vcpus, started_vcpus = 0;
3562 struct kvm_vcpu *started_vcpu = NULL;
3563
3564 if (is_vcpu_stopped(vcpu))
3565 return;
3566
3567 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3568 /* Only one cpu at a time may enter/leave the STOPPED state. */
3569 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3570 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3571
3572 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3573 kvm_s390_clear_stop_irq(vcpu);
3574
3575 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3576 __disable_ibs_on_vcpu(vcpu);
3577
3578 for (i = 0; i < online_vcpus; i++) {
3579 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3580 started_vcpus++;
3581 started_vcpu = vcpu->kvm->vcpus[i];
3582 }
3583 }
3584
3585 if (started_vcpus == 1) {
3586 /*
3587 * As we only have one VCPU left, we want to enable the
3588 * IBS facility for that VCPU to speed it up.
3589 */
3590 __enable_ibs_on_vcpu(started_vcpu);
3591 }
3592
3593 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3594 return;
3595 }
3596
3597 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3598 struct kvm_enable_cap *cap)
3599 {
3600 int r;
3601
3602 if (cap->flags)
3603 return -EINVAL;
3604
3605 switch (cap->cap) {
3606 case KVM_CAP_S390_CSS_SUPPORT:
3607 if (!vcpu->kvm->arch.css_support) {
3608 vcpu->kvm->arch.css_support = 1;
3609 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3610 trace_kvm_s390_enable_css(vcpu->kvm);
3611 }
3612 r = 0;
3613 break;
3614 default:
3615 r = -EINVAL;
3616 break;
3617 }
3618 return r;
3619 }
3620
3621 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3622 struct kvm_s390_mem_op *mop)
3623 {
3624 void __user *uaddr = (void __user *)mop->buf;
3625 void *tmpbuf = NULL;
3626 int r, srcu_idx;
3627 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3628 | KVM_S390_MEMOP_F_CHECK_ONLY;
3629
3630 if (mop->flags & ~supported_flags)
3631 return -EINVAL;
3632
3633 if (mop->size > MEM_OP_MAX_SIZE)
3634 return -E2BIG;
3635
3636 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3637 tmpbuf = vmalloc(mop->size);
3638 if (!tmpbuf)
3639 return -ENOMEM;
3640 }
3641
3642 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3643
3644 switch (mop->op) {
3645 case KVM_S390_MEMOP_LOGICAL_READ:
3646 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3647 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3648 mop->size, GACC_FETCH);
3649 break;
3650 }
3651 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3652 if (r == 0) {
3653 if (copy_to_user(uaddr, tmpbuf, mop->size))
3654 r = -EFAULT;
3655 }
3656 break;
3657 case KVM_S390_MEMOP_LOGICAL_WRITE:
3658 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3659 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3660 mop->size, GACC_STORE);
3661 break;
3662 }
3663 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3664 r = -EFAULT;
3665 break;
3666 }
3667 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3668 break;
3669 default:
3670 r = -EINVAL;
3671 }
3672
3673 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3674
3675 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3676 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3677
3678 vfree(tmpbuf);
3679 return r;
3680 }
3681
3682 long kvm_arch_vcpu_ioctl(struct file *filp,
3683 unsigned int ioctl, unsigned long arg)
3684 {
3685 struct kvm_vcpu *vcpu = filp->private_data;
3686 void __user *argp = (void __user *)arg;
3687 int idx;
3688 long r;
3689
3690 switch (ioctl) {
3691 case KVM_S390_IRQ: {
3692 struct kvm_s390_irq s390irq;
3693
3694 r = -EFAULT;
3695 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3696 break;
3697 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3698 break;
3699 }
3700 case KVM_S390_INTERRUPT: {
3701 struct kvm_s390_interrupt s390int;
3702 struct kvm_s390_irq s390irq;
3703
3704 r = -EFAULT;
3705 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3706 break;
3707 if (s390int_to_s390irq(&s390int, &s390irq))
3708 return -EINVAL;
3709 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3710 break;
3711 }
3712 case KVM_S390_STORE_STATUS:
3713 idx = srcu_read_lock(&vcpu->kvm->srcu);
3714 r = kvm_s390_vcpu_store_status(vcpu, arg);
3715 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3716 break;
3717 case KVM_S390_SET_INITIAL_PSW: {
3718 psw_t psw;
3719
3720 r = -EFAULT;
3721 if (copy_from_user(&psw, argp, sizeof(psw)))
3722 break;
3723 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3724 break;
3725 }
3726 case KVM_S390_INITIAL_RESET:
3727 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3728 break;
3729 case KVM_SET_ONE_REG:
3730 case KVM_GET_ONE_REG: {
3731 struct kvm_one_reg reg;
3732 r = -EFAULT;
3733 if (copy_from_user(&reg, argp, sizeof(reg)))
3734 break;
3735 if (ioctl == KVM_SET_ONE_REG)
3736 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3737 else
3738 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3739 break;
3740 }
3741 #ifdef CONFIG_KVM_S390_UCONTROL
3742 case KVM_S390_UCAS_MAP: {
3743 struct kvm_s390_ucas_mapping ucasmap;
3744
3745 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3746 r = -EFAULT;
3747 break;
3748 }
3749
3750 if (!kvm_is_ucontrol(vcpu->kvm)) {
3751 r = -EINVAL;
3752 break;
3753 }
3754
3755 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3756 ucasmap.vcpu_addr, ucasmap.length);
3757 break;
3758 }
3759 case KVM_S390_UCAS_UNMAP: {
3760 struct kvm_s390_ucas_mapping ucasmap;
3761
3762 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3763 r = -EFAULT;
3764 break;
3765 }
3766
3767 if (!kvm_is_ucontrol(vcpu->kvm)) {
3768 r = -EINVAL;
3769 break;
3770 }
3771
3772 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3773 ucasmap.length);
3774 break;
3775 }
3776 #endif
3777 case KVM_S390_VCPU_FAULT: {
3778 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3779 break;
3780 }
3781 case KVM_ENABLE_CAP:
3782 {
3783 struct kvm_enable_cap cap;
3784 r = -EFAULT;
3785 if (copy_from_user(&cap, argp, sizeof(cap)))
3786 break;
3787 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3788 break;
3789 }
3790 case KVM_S390_MEM_OP: {
3791 struct kvm_s390_mem_op mem_op;
3792
3793 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3794 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3795 else
3796 r = -EFAULT;
3797 break;
3798 }
3799 case KVM_S390_SET_IRQ_STATE: {
3800 struct kvm_s390_irq_state irq_state;
3801
3802 r = -EFAULT;
3803 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3804 break;
3805 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3806 irq_state.len == 0 ||
3807 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3808 r = -EINVAL;
3809 break;
3810 }
3811 r = kvm_s390_set_irq_state(vcpu,
3812 (void __user *) irq_state.buf,
3813 irq_state.len);
3814 break;
3815 }
3816 case KVM_S390_GET_IRQ_STATE: {
3817 struct kvm_s390_irq_state irq_state;
3818
3819 r = -EFAULT;
3820 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3821 break;
3822 if (irq_state.len == 0) {
3823 r = -EINVAL;
3824 break;
3825 }
3826 r = kvm_s390_get_irq_state(vcpu,
3827 (__u8 __user *) irq_state.buf,
3828 irq_state.len);
3829 break;
3830 }
3831 default:
3832 r = -ENOTTY;
3833 }
3834 return r;
3835 }
3836
3837 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3838 {
3839 #ifdef CONFIG_KVM_S390_UCONTROL
3840 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3841 && (kvm_is_ucontrol(vcpu->kvm))) {
3842 vmf->page = virt_to_page(vcpu->arch.sie_block);
3843 get_page(vmf->page);
3844 return 0;
3845 }
3846 #endif
3847 return VM_FAULT_SIGBUS;
3848 }
3849
3850 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3851 unsigned long npages)
3852 {
3853 return 0;
3854 }
3855
3856 /* Section: memory related */
3857 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3858 struct kvm_memory_slot *memslot,
3859 const struct kvm_userspace_memory_region *mem,
3860 enum kvm_mr_change change)
3861 {
3862 /* A few sanity checks. We can have memory slots which have to be
3863 located/ended at a segment boundary (1MB). The memory in userland is
3864 ok to be fragmented into various different vmas. It is okay to mmap()
3865 and munmap() stuff in this slot after doing this call at any time */
3866
3867 if (mem->userspace_addr & 0xffffful)
3868 return -EINVAL;
3869
3870 if (mem->memory_size & 0xffffful)
3871 return -EINVAL;
3872
3873 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3874 return -EINVAL;
3875
3876 return 0;
3877 }
3878
3879 void kvm_arch_commit_memory_region(struct kvm *kvm,
3880 const struct kvm_userspace_memory_region *mem,
3881 const struct kvm_memory_slot *old,
3882 const struct kvm_memory_slot *new,
3883 enum kvm_mr_change change)
3884 {
3885 int rc;
3886
3887 /* If the basics of the memslot do not change, we do not want
3888 * to update the gmap. Every update causes several unnecessary
3889 * segment translation exceptions. This is usually handled just
3890 * fine by the normal fault handler + gmap, but it will also
3891 * cause faults on the prefix page of running guest CPUs.
3892 */
3893 if (old->userspace_addr == mem->userspace_addr &&
3894 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3895 old->npages * PAGE_SIZE == mem->memory_size)
3896 return;
3897
3898 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3899 mem->guest_phys_addr, mem->memory_size);
3900 if (rc)
3901 pr_warn("failed to commit memory region\n");
3902 return;
3903 }
3904
3905 static inline unsigned long nonhyp_mask(int i)
3906 {
3907 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3908
3909 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3910 }
3911
3912 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3913 {
3914 vcpu->valid_wakeup = false;
3915 }
3916
3917 static int __init kvm_s390_init(void)
3918 {
3919 int i;
3920
3921 if (!sclp.has_sief2) {
3922 pr_info("SIE not available\n");
3923 return -ENODEV;
3924 }
3925
3926 for (i = 0; i < 16; i++)
3927 kvm_s390_fac_list_mask[i] |=
3928 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3929
3930 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3931 }
3932
3933 static void __exit kvm_s390_exit(void)
3934 {
3935 kvm_exit();
3936 }
3937
3938 module_init(kvm_s390_init);
3939 module_exit(kvm_s390_exit);
3940
3941 /*
3942 * Enable autoloading of the kvm module.
3943 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3944 * since x86 takes a different approach.
3945 */
3946 #include <linux/miscdevice.h>
3947 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3948 MODULE_ALIAS("devname:kvm");