]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/s390/kvm/kvm-s390.c
Merge branch 'for-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
[mirror_ubuntu-artful-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 { "instruction_spx", VCPU_STAT(instruction_spx) },
93 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 { "instruction_stap", VCPU_STAT(instruction_stap) },
95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 { "instruction_essa", VCPU_STAT(instruction_essa) },
100 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 { "instruction_sie", VCPU_STAT(instruction_sie) },
105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 { "diagnose_10", VCPU_STAT(diagnose_10) },
122 { "diagnose_44", VCPU_STAT(diagnose_44) },
123 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 { "diagnose_258", VCPU_STAT(diagnose_258) },
125 { "diagnose_308", VCPU_STAT(diagnose_308) },
126 { "diagnose_500", VCPU_STAT(diagnose_500) },
127 { NULL }
128 };
129
130 /* allow nested virtualization in KVM (if enabled by user space) */
131 static int nested;
132 module_param(nested, int, S_IRUGO);
133 MODULE_PARM_DESC(nested, "Nested virtualization support");
134
135 /* upper facilities limit for kvm */
136 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
137
138 unsigned long kvm_s390_fac_list_mask_size(void)
139 {
140 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
141 return ARRAY_SIZE(kvm_s390_fac_list_mask);
142 }
143
144 /* available cpu features supported by kvm */
145 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
146 /* available subfunctions indicated via query / "test bit" */
147 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
148
149 static struct gmap_notifier gmap_notifier;
150 static struct gmap_notifier vsie_gmap_notifier;
151 debug_info_t *kvm_s390_dbf;
152
153 /* Section: not file related */
154 int kvm_arch_hardware_enable(void)
155 {
156 /* every s390 is virtualization enabled ;-) */
157 return 0;
158 }
159
160 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
161 unsigned long end);
162
163 /*
164 * This callback is executed during stop_machine(). All CPUs are therefore
165 * temporarily stopped. In order not to change guest behavior, we have to
166 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
167 * so a CPU won't be stopped while calculating with the epoch.
168 */
169 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
170 void *v)
171 {
172 struct kvm *kvm;
173 struct kvm_vcpu *vcpu;
174 int i;
175 unsigned long long *delta = v;
176
177 list_for_each_entry(kvm, &vm_list, vm_list) {
178 kvm->arch.epoch -= *delta;
179 kvm_for_each_vcpu(i, vcpu, kvm) {
180 vcpu->arch.sie_block->epoch -= *delta;
181 if (vcpu->arch.cputm_enabled)
182 vcpu->arch.cputm_start += *delta;
183 if (vcpu->arch.vsie_block)
184 vcpu->arch.vsie_block->epoch -= *delta;
185 }
186 }
187 return NOTIFY_OK;
188 }
189
190 static struct notifier_block kvm_clock_notifier = {
191 .notifier_call = kvm_clock_sync,
192 };
193
194 int kvm_arch_hardware_setup(void)
195 {
196 gmap_notifier.notifier_call = kvm_gmap_notifier;
197 gmap_register_pte_notifier(&gmap_notifier);
198 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
199 gmap_register_pte_notifier(&vsie_gmap_notifier);
200 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
201 &kvm_clock_notifier);
202 return 0;
203 }
204
205 void kvm_arch_hardware_unsetup(void)
206 {
207 gmap_unregister_pte_notifier(&gmap_notifier);
208 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
209 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
210 &kvm_clock_notifier);
211 }
212
213 static void allow_cpu_feat(unsigned long nr)
214 {
215 set_bit_inv(nr, kvm_s390_available_cpu_feat);
216 }
217
218 static inline int plo_test_bit(unsigned char nr)
219 {
220 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
221 int cc;
222
223 asm volatile(
224 /* Parameter registers are ignored for "test bit" */
225 " plo 0,0,0,0(0)\n"
226 " ipm %0\n"
227 " srl %0,28\n"
228 : "=d" (cc)
229 : "d" (r0)
230 : "cc");
231 return cc == 0;
232 }
233
234 static void kvm_s390_cpu_feat_init(void)
235 {
236 int i;
237
238 for (i = 0; i < 256; ++i) {
239 if (plo_test_bit(i))
240 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
241 }
242
243 if (test_facility(28)) /* TOD-clock steering */
244 ptff(kvm_s390_available_subfunc.ptff,
245 sizeof(kvm_s390_available_subfunc.ptff),
246 PTFF_QAF);
247
248 if (test_facility(17)) { /* MSA */
249 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
250 kvm_s390_available_subfunc.kmac);
251 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
252 kvm_s390_available_subfunc.kmc);
253 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
254 kvm_s390_available_subfunc.km);
255 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.kimd);
257 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.klmd);
259 }
260 if (test_facility(76)) /* MSA3 */
261 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.pckmo);
263 if (test_facility(77)) { /* MSA4 */
264 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.kmctr);
266 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.kmf);
268 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
269 kvm_s390_available_subfunc.kmo);
270 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.pcc);
272 }
273 if (test_facility(57)) /* MSA5 */
274 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.ppno);
276
277 if (MACHINE_HAS_ESOP)
278 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
279 /*
280 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
281 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
282 */
283 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
284 !test_facility(3) || !nested)
285 return;
286 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
287 if (sclp.has_64bscao)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
289 if (sclp.has_siif)
290 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
291 if (sclp.has_gpere)
292 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
293 if (sclp.has_gsls)
294 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
295 if (sclp.has_ib)
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
297 if (sclp.has_cei)
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
299 if (sclp.has_ibs)
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
301 /*
302 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
303 * all skey handling functions read/set the skey from the PGSTE
304 * instead of the real storage key.
305 *
306 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
307 * pages being detected as preserved although they are resident.
308 *
309 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
310 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
311 *
312 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
313 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
314 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
315 *
316 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
317 * cannot easily shadow the SCA because of the ipte lock.
318 */
319 }
320
321 int kvm_arch_init(void *opaque)
322 {
323 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
324 if (!kvm_s390_dbf)
325 return -ENOMEM;
326
327 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
328 debug_unregister(kvm_s390_dbf);
329 return -ENOMEM;
330 }
331
332 kvm_s390_cpu_feat_init();
333
334 /* Register floating interrupt controller interface. */
335 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
336 }
337
338 void kvm_arch_exit(void)
339 {
340 debug_unregister(kvm_s390_dbf);
341 }
342
343 /* Section: device related */
344 long kvm_arch_dev_ioctl(struct file *filp,
345 unsigned int ioctl, unsigned long arg)
346 {
347 if (ioctl == KVM_S390_ENABLE_SIE)
348 return s390_enable_sie();
349 return -EINVAL;
350 }
351
352 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
353 {
354 int r;
355
356 switch (ext) {
357 case KVM_CAP_S390_PSW:
358 case KVM_CAP_S390_GMAP:
359 case KVM_CAP_SYNC_MMU:
360 #ifdef CONFIG_KVM_S390_UCONTROL
361 case KVM_CAP_S390_UCONTROL:
362 #endif
363 case KVM_CAP_ASYNC_PF:
364 case KVM_CAP_SYNC_REGS:
365 case KVM_CAP_ONE_REG:
366 case KVM_CAP_ENABLE_CAP:
367 case KVM_CAP_S390_CSS_SUPPORT:
368 case KVM_CAP_IOEVENTFD:
369 case KVM_CAP_DEVICE_CTRL:
370 case KVM_CAP_ENABLE_CAP_VM:
371 case KVM_CAP_S390_IRQCHIP:
372 case KVM_CAP_VM_ATTRIBUTES:
373 case KVM_CAP_MP_STATE:
374 case KVM_CAP_IMMEDIATE_EXIT:
375 case KVM_CAP_S390_INJECT_IRQ:
376 case KVM_CAP_S390_USER_SIGP:
377 case KVM_CAP_S390_USER_STSI:
378 case KVM_CAP_S390_SKEYS:
379 case KVM_CAP_S390_IRQ_STATE:
380 case KVM_CAP_S390_USER_INSTR0:
381 r = 1;
382 break;
383 case KVM_CAP_S390_MEM_OP:
384 r = MEM_OP_MAX_SIZE;
385 break;
386 case KVM_CAP_NR_VCPUS:
387 case KVM_CAP_MAX_VCPUS:
388 r = KVM_S390_BSCA_CPU_SLOTS;
389 if (!kvm_s390_use_sca_entries())
390 r = KVM_MAX_VCPUS;
391 else if (sclp.has_esca && sclp.has_64bscao)
392 r = KVM_S390_ESCA_CPU_SLOTS;
393 break;
394 case KVM_CAP_NR_MEMSLOTS:
395 r = KVM_USER_MEM_SLOTS;
396 break;
397 case KVM_CAP_S390_COW:
398 r = MACHINE_HAS_ESOP;
399 break;
400 case KVM_CAP_S390_VECTOR_REGISTERS:
401 r = MACHINE_HAS_VX;
402 break;
403 case KVM_CAP_S390_RI:
404 r = test_facility(64);
405 break;
406 default:
407 r = 0;
408 }
409 return r;
410 }
411
412 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
413 struct kvm_memory_slot *memslot)
414 {
415 gfn_t cur_gfn, last_gfn;
416 unsigned long address;
417 struct gmap *gmap = kvm->arch.gmap;
418
419 /* Loop over all guest pages */
420 last_gfn = memslot->base_gfn + memslot->npages;
421 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
422 address = gfn_to_hva_memslot(memslot, cur_gfn);
423
424 if (test_and_clear_guest_dirty(gmap->mm, address))
425 mark_page_dirty(kvm, cur_gfn);
426 if (fatal_signal_pending(current))
427 return;
428 cond_resched();
429 }
430 }
431
432 /* Section: vm related */
433 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
434
435 /*
436 * Get (and clear) the dirty memory log for a memory slot.
437 */
438 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
439 struct kvm_dirty_log *log)
440 {
441 int r;
442 unsigned long n;
443 struct kvm_memslots *slots;
444 struct kvm_memory_slot *memslot;
445 int is_dirty = 0;
446
447 if (kvm_is_ucontrol(kvm))
448 return -EINVAL;
449
450 mutex_lock(&kvm->slots_lock);
451
452 r = -EINVAL;
453 if (log->slot >= KVM_USER_MEM_SLOTS)
454 goto out;
455
456 slots = kvm_memslots(kvm);
457 memslot = id_to_memslot(slots, log->slot);
458 r = -ENOENT;
459 if (!memslot->dirty_bitmap)
460 goto out;
461
462 kvm_s390_sync_dirty_log(kvm, memslot);
463 r = kvm_get_dirty_log(kvm, log, &is_dirty);
464 if (r)
465 goto out;
466
467 /* Clear the dirty log */
468 if (is_dirty) {
469 n = kvm_dirty_bitmap_bytes(memslot);
470 memset(memslot->dirty_bitmap, 0, n);
471 }
472 r = 0;
473 out:
474 mutex_unlock(&kvm->slots_lock);
475 return r;
476 }
477
478 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
479 {
480 unsigned int i;
481 struct kvm_vcpu *vcpu;
482
483 kvm_for_each_vcpu(i, vcpu, kvm) {
484 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
485 }
486 }
487
488 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
489 {
490 int r;
491
492 if (cap->flags)
493 return -EINVAL;
494
495 switch (cap->cap) {
496 case KVM_CAP_S390_IRQCHIP:
497 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
498 kvm->arch.use_irqchip = 1;
499 r = 0;
500 break;
501 case KVM_CAP_S390_USER_SIGP:
502 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
503 kvm->arch.user_sigp = 1;
504 r = 0;
505 break;
506 case KVM_CAP_S390_VECTOR_REGISTERS:
507 mutex_lock(&kvm->lock);
508 if (kvm->created_vcpus) {
509 r = -EBUSY;
510 } else if (MACHINE_HAS_VX) {
511 set_kvm_facility(kvm->arch.model.fac_mask, 129);
512 set_kvm_facility(kvm->arch.model.fac_list, 129);
513 if (test_facility(134)) {
514 set_kvm_facility(kvm->arch.model.fac_mask, 134);
515 set_kvm_facility(kvm->arch.model.fac_list, 134);
516 }
517 if (test_facility(135)) {
518 set_kvm_facility(kvm->arch.model.fac_mask, 135);
519 set_kvm_facility(kvm->arch.model.fac_list, 135);
520 }
521 r = 0;
522 } else
523 r = -EINVAL;
524 mutex_unlock(&kvm->lock);
525 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
526 r ? "(not available)" : "(success)");
527 break;
528 case KVM_CAP_S390_RI:
529 r = -EINVAL;
530 mutex_lock(&kvm->lock);
531 if (kvm->created_vcpus) {
532 r = -EBUSY;
533 } else if (test_facility(64)) {
534 set_kvm_facility(kvm->arch.model.fac_mask, 64);
535 set_kvm_facility(kvm->arch.model.fac_list, 64);
536 r = 0;
537 }
538 mutex_unlock(&kvm->lock);
539 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
540 r ? "(not available)" : "(success)");
541 break;
542 case KVM_CAP_S390_USER_STSI:
543 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
544 kvm->arch.user_stsi = 1;
545 r = 0;
546 break;
547 case KVM_CAP_S390_USER_INSTR0:
548 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
549 kvm->arch.user_instr0 = 1;
550 icpt_operexc_on_all_vcpus(kvm);
551 r = 0;
552 break;
553 default:
554 r = -EINVAL;
555 break;
556 }
557 return r;
558 }
559
560 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
561 {
562 int ret;
563
564 switch (attr->attr) {
565 case KVM_S390_VM_MEM_LIMIT_SIZE:
566 ret = 0;
567 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
568 kvm->arch.mem_limit);
569 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
570 ret = -EFAULT;
571 break;
572 default:
573 ret = -ENXIO;
574 break;
575 }
576 return ret;
577 }
578
579 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
580 {
581 int ret;
582 unsigned int idx;
583 switch (attr->attr) {
584 case KVM_S390_VM_MEM_ENABLE_CMMA:
585 ret = -ENXIO;
586 if (!sclp.has_cmma)
587 break;
588
589 ret = -EBUSY;
590 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
591 mutex_lock(&kvm->lock);
592 if (!kvm->created_vcpus) {
593 kvm->arch.use_cmma = 1;
594 ret = 0;
595 }
596 mutex_unlock(&kvm->lock);
597 break;
598 case KVM_S390_VM_MEM_CLR_CMMA:
599 ret = -ENXIO;
600 if (!sclp.has_cmma)
601 break;
602 ret = -EINVAL;
603 if (!kvm->arch.use_cmma)
604 break;
605
606 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
607 mutex_lock(&kvm->lock);
608 idx = srcu_read_lock(&kvm->srcu);
609 s390_reset_cmma(kvm->arch.gmap->mm);
610 srcu_read_unlock(&kvm->srcu, idx);
611 mutex_unlock(&kvm->lock);
612 ret = 0;
613 break;
614 case KVM_S390_VM_MEM_LIMIT_SIZE: {
615 unsigned long new_limit;
616
617 if (kvm_is_ucontrol(kvm))
618 return -EINVAL;
619
620 if (get_user(new_limit, (u64 __user *)attr->addr))
621 return -EFAULT;
622
623 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
624 new_limit > kvm->arch.mem_limit)
625 return -E2BIG;
626
627 if (!new_limit)
628 return -EINVAL;
629
630 /* gmap_create takes last usable address */
631 if (new_limit != KVM_S390_NO_MEM_LIMIT)
632 new_limit -= 1;
633
634 ret = -EBUSY;
635 mutex_lock(&kvm->lock);
636 if (!kvm->created_vcpus) {
637 /* gmap_create will round the limit up */
638 struct gmap *new = gmap_create(current->mm, new_limit);
639
640 if (!new) {
641 ret = -ENOMEM;
642 } else {
643 gmap_remove(kvm->arch.gmap);
644 new->private = kvm;
645 kvm->arch.gmap = new;
646 ret = 0;
647 }
648 }
649 mutex_unlock(&kvm->lock);
650 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
651 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
652 (void *) kvm->arch.gmap->asce);
653 break;
654 }
655 default:
656 ret = -ENXIO;
657 break;
658 }
659 return ret;
660 }
661
662 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
663
664 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
665 {
666 struct kvm_vcpu *vcpu;
667 int i;
668
669 if (!test_kvm_facility(kvm, 76))
670 return -EINVAL;
671
672 mutex_lock(&kvm->lock);
673 switch (attr->attr) {
674 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
675 get_random_bytes(
676 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
677 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
678 kvm->arch.crypto.aes_kw = 1;
679 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
680 break;
681 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
682 get_random_bytes(
683 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
684 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
685 kvm->arch.crypto.dea_kw = 1;
686 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
687 break;
688 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
689 kvm->arch.crypto.aes_kw = 0;
690 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
691 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
692 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
693 break;
694 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
695 kvm->arch.crypto.dea_kw = 0;
696 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
697 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
698 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
699 break;
700 default:
701 mutex_unlock(&kvm->lock);
702 return -ENXIO;
703 }
704
705 kvm_for_each_vcpu(i, vcpu, kvm) {
706 kvm_s390_vcpu_crypto_setup(vcpu);
707 exit_sie(vcpu);
708 }
709 mutex_unlock(&kvm->lock);
710 return 0;
711 }
712
713 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
714 {
715 u8 gtod_high;
716
717 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
718 sizeof(gtod_high)))
719 return -EFAULT;
720
721 if (gtod_high != 0)
722 return -EINVAL;
723 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
724
725 return 0;
726 }
727
728 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
729 {
730 u64 gtod;
731
732 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
733 return -EFAULT;
734
735 kvm_s390_set_tod_clock(kvm, gtod);
736 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
737 return 0;
738 }
739
740 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
741 {
742 int ret;
743
744 if (attr->flags)
745 return -EINVAL;
746
747 switch (attr->attr) {
748 case KVM_S390_VM_TOD_HIGH:
749 ret = kvm_s390_set_tod_high(kvm, attr);
750 break;
751 case KVM_S390_VM_TOD_LOW:
752 ret = kvm_s390_set_tod_low(kvm, attr);
753 break;
754 default:
755 ret = -ENXIO;
756 break;
757 }
758 return ret;
759 }
760
761 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
762 {
763 u8 gtod_high = 0;
764
765 if (copy_to_user((void __user *)attr->addr, &gtod_high,
766 sizeof(gtod_high)))
767 return -EFAULT;
768 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
769
770 return 0;
771 }
772
773 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
774 {
775 u64 gtod;
776
777 gtod = kvm_s390_get_tod_clock_fast(kvm);
778 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
779 return -EFAULT;
780 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
781
782 return 0;
783 }
784
785 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
786 {
787 int ret;
788
789 if (attr->flags)
790 return -EINVAL;
791
792 switch (attr->attr) {
793 case KVM_S390_VM_TOD_HIGH:
794 ret = kvm_s390_get_tod_high(kvm, attr);
795 break;
796 case KVM_S390_VM_TOD_LOW:
797 ret = kvm_s390_get_tod_low(kvm, attr);
798 break;
799 default:
800 ret = -ENXIO;
801 break;
802 }
803 return ret;
804 }
805
806 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
807 {
808 struct kvm_s390_vm_cpu_processor *proc;
809 u16 lowest_ibc, unblocked_ibc;
810 int ret = 0;
811
812 mutex_lock(&kvm->lock);
813 if (kvm->created_vcpus) {
814 ret = -EBUSY;
815 goto out;
816 }
817 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
818 if (!proc) {
819 ret = -ENOMEM;
820 goto out;
821 }
822 if (!copy_from_user(proc, (void __user *)attr->addr,
823 sizeof(*proc))) {
824 kvm->arch.model.cpuid = proc->cpuid;
825 lowest_ibc = sclp.ibc >> 16 & 0xfff;
826 unblocked_ibc = sclp.ibc & 0xfff;
827 if (lowest_ibc && proc->ibc) {
828 if (proc->ibc > unblocked_ibc)
829 kvm->arch.model.ibc = unblocked_ibc;
830 else if (proc->ibc < lowest_ibc)
831 kvm->arch.model.ibc = lowest_ibc;
832 else
833 kvm->arch.model.ibc = proc->ibc;
834 }
835 memcpy(kvm->arch.model.fac_list, proc->fac_list,
836 S390_ARCH_FAC_LIST_SIZE_BYTE);
837 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
838 kvm->arch.model.ibc,
839 kvm->arch.model.cpuid);
840 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
841 kvm->arch.model.fac_list[0],
842 kvm->arch.model.fac_list[1],
843 kvm->arch.model.fac_list[2]);
844 } else
845 ret = -EFAULT;
846 kfree(proc);
847 out:
848 mutex_unlock(&kvm->lock);
849 return ret;
850 }
851
852 static int kvm_s390_set_processor_feat(struct kvm *kvm,
853 struct kvm_device_attr *attr)
854 {
855 struct kvm_s390_vm_cpu_feat data;
856 int ret = -EBUSY;
857
858 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
859 return -EFAULT;
860 if (!bitmap_subset((unsigned long *) data.feat,
861 kvm_s390_available_cpu_feat,
862 KVM_S390_VM_CPU_FEAT_NR_BITS))
863 return -EINVAL;
864
865 mutex_lock(&kvm->lock);
866 if (!atomic_read(&kvm->online_vcpus)) {
867 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
868 KVM_S390_VM_CPU_FEAT_NR_BITS);
869 ret = 0;
870 }
871 mutex_unlock(&kvm->lock);
872 return ret;
873 }
874
875 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
876 struct kvm_device_attr *attr)
877 {
878 /*
879 * Once supported by kernel + hw, we have to store the subfunctions
880 * in kvm->arch and remember that user space configured them.
881 */
882 return -ENXIO;
883 }
884
885 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887 int ret = -ENXIO;
888
889 switch (attr->attr) {
890 case KVM_S390_VM_CPU_PROCESSOR:
891 ret = kvm_s390_set_processor(kvm, attr);
892 break;
893 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
894 ret = kvm_s390_set_processor_feat(kvm, attr);
895 break;
896 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
897 ret = kvm_s390_set_processor_subfunc(kvm, attr);
898 break;
899 }
900 return ret;
901 }
902
903 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905 struct kvm_s390_vm_cpu_processor *proc;
906 int ret = 0;
907
908 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
909 if (!proc) {
910 ret = -ENOMEM;
911 goto out;
912 }
913 proc->cpuid = kvm->arch.model.cpuid;
914 proc->ibc = kvm->arch.model.ibc;
915 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
916 S390_ARCH_FAC_LIST_SIZE_BYTE);
917 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
918 kvm->arch.model.ibc,
919 kvm->arch.model.cpuid);
920 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
921 kvm->arch.model.fac_list[0],
922 kvm->arch.model.fac_list[1],
923 kvm->arch.model.fac_list[2]);
924 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
925 ret = -EFAULT;
926 kfree(proc);
927 out:
928 return ret;
929 }
930
931 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
932 {
933 struct kvm_s390_vm_cpu_machine *mach;
934 int ret = 0;
935
936 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
937 if (!mach) {
938 ret = -ENOMEM;
939 goto out;
940 }
941 get_cpu_id((struct cpuid *) &mach->cpuid);
942 mach->ibc = sclp.ibc;
943 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
944 S390_ARCH_FAC_LIST_SIZE_BYTE);
945 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
946 sizeof(S390_lowcore.stfle_fac_list));
947 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
948 kvm->arch.model.ibc,
949 kvm->arch.model.cpuid);
950 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
951 mach->fac_mask[0],
952 mach->fac_mask[1],
953 mach->fac_mask[2]);
954 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
955 mach->fac_list[0],
956 mach->fac_list[1],
957 mach->fac_list[2]);
958 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
959 ret = -EFAULT;
960 kfree(mach);
961 out:
962 return ret;
963 }
964
965 static int kvm_s390_get_processor_feat(struct kvm *kvm,
966 struct kvm_device_attr *attr)
967 {
968 struct kvm_s390_vm_cpu_feat data;
969
970 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
971 KVM_S390_VM_CPU_FEAT_NR_BITS);
972 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
973 return -EFAULT;
974 return 0;
975 }
976
977 static int kvm_s390_get_machine_feat(struct kvm *kvm,
978 struct kvm_device_attr *attr)
979 {
980 struct kvm_s390_vm_cpu_feat data;
981
982 bitmap_copy((unsigned long *) data.feat,
983 kvm_s390_available_cpu_feat,
984 KVM_S390_VM_CPU_FEAT_NR_BITS);
985 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
986 return -EFAULT;
987 return 0;
988 }
989
990 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
991 struct kvm_device_attr *attr)
992 {
993 /*
994 * Once we can actually configure subfunctions (kernel + hw support),
995 * we have to check if they were already set by user space, if so copy
996 * them from kvm->arch.
997 */
998 return -ENXIO;
999 }
1000
1001 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1002 struct kvm_device_attr *attr)
1003 {
1004 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1005 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1006 return -EFAULT;
1007 return 0;
1008 }
1009 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1010 {
1011 int ret = -ENXIO;
1012
1013 switch (attr->attr) {
1014 case KVM_S390_VM_CPU_PROCESSOR:
1015 ret = kvm_s390_get_processor(kvm, attr);
1016 break;
1017 case KVM_S390_VM_CPU_MACHINE:
1018 ret = kvm_s390_get_machine(kvm, attr);
1019 break;
1020 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1021 ret = kvm_s390_get_processor_feat(kvm, attr);
1022 break;
1023 case KVM_S390_VM_CPU_MACHINE_FEAT:
1024 ret = kvm_s390_get_machine_feat(kvm, attr);
1025 break;
1026 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1027 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1028 break;
1029 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1030 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1031 break;
1032 }
1033 return ret;
1034 }
1035
1036 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1037 {
1038 int ret;
1039
1040 switch (attr->group) {
1041 case KVM_S390_VM_MEM_CTRL:
1042 ret = kvm_s390_set_mem_control(kvm, attr);
1043 break;
1044 case KVM_S390_VM_TOD:
1045 ret = kvm_s390_set_tod(kvm, attr);
1046 break;
1047 case KVM_S390_VM_CPU_MODEL:
1048 ret = kvm_s390_set_cpu_model(kvm, attr);
1049 break;
1050 case KVM_S390_VM_CRYPTO:
1051 ret = kvm_s390_vm_set_crypto(kvm, attr);
1052 break;
1053 default:
1054 ret = -ENXIO;
1055 break;
1056 }
1057
1058 return ret;
1059 }
1060
1061 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1062 {
1063 int ret;
1064
1065 switch (attr->group) {
1066 case KVM_S390_VM_MEM_CTRL:
1067 ret = kvm_s390_get_mem_control(kvm, attr);
1068 break;
1069 case KVM_S390_VM_TOD:
1070 ret = kvm_s390_get_tod(kvm, attr);
1071 break;
1072 case KVM_S390_VM_CPU_MODEL:
1073 ret = kvm_s390_get_cpu_model(kvm, attr);
1074 break;
1075 default:
1076 ret = -ENXIO;
1077 break;
1078 }
1079
1080 return ret;
1081 }
1082
1083 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1084 {
1085 int ret;
1086
1087 switch (attr->group) {
1088 case KVM_S390_VM_MEM_CTRL:
1089 switch (attr->attr) {
1090 case KVM_S390_VM_MEM_ENABLE_CMMA:
1091 case KVM_S390_VM_MEM_CLR_CMMA:
1092 ret = sclp.has_cmma ? 0 : -ENXIO;
1093 break;
1094 case KVM_S390_VM_MEM_LIMIT_SIZE:
1095 ret = 0;
1096 break;
1097 default:
1098 ret = -ENXIO;
1099 break;
1100 }
1101 break;
1102 case KVM_S390_VM_TOD:
1103 switch (attr->attr) {
1104 case KVM_S390_VM_TOD_LOW:
1105 case KVM_S390_VM_TOD_HIGH:
1106 ret = 0;
1107 break;
1108 default:
1109 ret = -ENXIO;
1110 break;
1111 }
1112 break;
1113 case KVM_S390_VM_CPU_MODEL:
1114 switch (attr->attr) {
1115 case KVM_S390_VM_CPU_PROCESSOR:
1116 case KVM_S390_VM_CPU_MACHINE:
1117 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1118 case KVM_S390_VM_CPU_MACHINE_FEAT:
1119 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1120 ret = 0;
1121 break;
1122 /* configuring subfunctions is not supported yet */
1123 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1124 default:
1125 ret = -ENXIO;
1126 break;
1127 }
1128 break;
1129 case KVM_S390_VM_CRYPTO:
1130 switch (attr->attr) {
1131 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1132 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1133 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1134 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1135 ret = 0;
1136 break;
1137 default:
1138 ret = -ENXIO;
1139 break;
1140 }
1141 break;
1142 default:
1143 ret = -ENXIO;
1144 break;
1145 }
1146
1147 return ret;
1148 }
1149
1150 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1151 {
1152 uint8_t *keys;
1153 uint64_t hva;
1154 int i, r = 0;
1155
1156 if (args->flags != 0)
1157 return -EINVAL;
1158
1159 /* Is this guest using storage keys? */
1160 if (!mm_use_skey(current->mm))
1161 return KVM_S390_GET_SKEYS_NONE;
1162
1163 /* Enforce sane limit on memory allocation */
1164 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1165 return -EINVAL;
1166
1167 keys = kmalloc_array(args->count, sizeof(uint8_t),
1168 GFP_KERNEL | __GFP_NOWARN);
1169 if (!keys)
1170 keys = vmalloc(sizeof(uint8_t) * args->count);
1171 if (!keys)
1172 return -ENOMEM;
1173
1174 down_read(&current->mm->mmap_sem);
1175 for (i = 0; i < args->count; i++) {
1176 hva = gfn_to_hva(kvm, args->start_gfn + i);
1177 if (kvm_is_error_hva(hva)) {
1178 r = -EFAULT;
1179 break;
1180 }
1181
1182 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1183 if (r)
1184 break;
1185 }
1186 up_read(&current->mm->mmap_sem);
1187
1188 if (!r) {
1189 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1190 sizeof(uint8_t) * args->count);
1191 if (r)
1192 r = -EFAULT;
1193 }
1194
1195 kvfree(keys);
1196 return r;
1197 }
1198
1199 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1200 {
1201 uint8_t *keys;
1202 uint64_t hva;
1203 int i, r = 0;
1204
1205 if (args->flags != 0)
1206 return -EINVAL;
1207
1208 /* Enforce sane limit on memory allocation */
1209 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1210 return -EINVAL;
1211
1212 keys = kmalloc_array(args->count, sizeof(uint8_t),
1213 GFP_KERNEL | __GFP_NOWARN);
1214 if (!keys)
1215 keys = vmalloc(sizeof(uint8_t) * args->count);
1216 if (!keys)
1217 return -ENOMEM;
1218
1219 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1220 sizeof(uint8_t) * args->count);
1221 if (r) {
1222 r = -EFAULT;
1223 goto out;
1224 }
1225
1226 /* Enable storage key handling for the guest */
1227 r = s390_enable_skey();
1228 if (r)
1229 goto out;
1230
1231 down_read(&current->mm->mmap_sem);
1232 for (i = 0; i < args->count; i++) {
1233 hva = gfn_to_hva(kvm, args->start_gfn + i);
1234 if (kvm_is_error_hva(hva)) {
1235 r = -EFAULT;
1236 break;
1237 }
1238
1239 /* Lowest order bit is reserved */
1240 if (keys[i] & 0x01) {
1241 r = -EINVAL;
1242 break;
1243 }
1244
1245 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1246 if (r)
1247 break;
1248 }
1249 up_read(&current->mm->mmap_sem);
1250 out:
1251 kvfree(keys);
1252 return r;
1253 }
1254
1255 long kvm_arch_vm_ioctl(struct file *filp,
1256 unsigned int ioctl, unsigned long arg)
1257 {
1258 struct kvm *kvm = filp->private_data;
1259 void __user *argp = (void __user *)arg;
1260 struct kvm_device_attr attr;
1261 int r;
1262
1263 switch (ioctl) {
1264 case KVM_S390_INTERRUPT: {
1265 struct kvm_s390_interrupt s390int;
1266
1267 r = -EFAULT;
1268 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1269 break;
1270 r = kvm_s390_inject_vm(kvm, &s390int);
1271 break;
1272 }
1273 case KVM_ENABLE_CAP: {
1274 struct kvm_enable_cap cap;
1275 r = -EFAULT;
1276 if (copy_from_user(&cap, argp, sizeof(cap)))
1277 break;
1278 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1279 break;
1280 }
1281 case KVM_CREATE_IRQCHIP: {
1282 struct kvm_irq_routing_entry routing;
1283
1284 r = -EINVAL;
1285 if (kvm->arch.use_irqchip) {
1286 /* Set up dummy routing. */
1287 memset(&routing, 0, sizeof(routing));
1288 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1289 }
1290 break;
1291 }
1292 case KVM_SET_DEVICE_ATTR: {
1293 r = -EFAULT;
1294 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1295 break;
1296 r = kvm_s390_vm_set_attr(kvm, &attr);
1297 break;
1298 }
1299 case KVM_GET_DEVICE_ATTR: {
1300 r = -EFAULT;
1301 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1302 break;
1303 r = kvm_s390_vm_get_attr(kvm, &attr);
1304 break;
1305 }
1306 case KVM_HAS_DEVICE_ATTR: {
1307 r = -EFAULT;
1308 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1309 break;
1310 r = kvm_s390_vm_has_attr(kvm, &attr);
1311 break;
1312 }
1313 case KVM_S390_GET_SKEYS: {
1314 struct kvm_s390_skeys args;
1315
1316 r = -EFAULT;
1317 if (copy_from_user(&args, argp,
1318 sizeof(struct kvm_s390_skeys)))
1319 break;
1320 r = kvm_s390_get_skeys(kvm, &args);
1321 break;
1322 }
1323 case KVM_S390_SET_SKEYS: {
1324 struct kvm_s390_skeys args;
1325
1326 r = -EFAULT;
1327 if (copy_from_user(&args, argp,
1328 sizeof(struct kvm_s390_skeys)))
1329 break;
1330 r = kvm_s390_set_skeys(kvm, &args);
1331 break;
1332 }
1333 default:
1334 r = -ENOTTY;
1335 }
1336
1337 return r;
1338 }
1339
1340 static int kvm_s390_query_ap_config(u8 *config)
1341 {
1342 u32 fcn_code = 0x04000000UL;
1343 u32 cc = 0;
1344
1345 memset(config, 0, 128);
1346 asm volatile(
1347 "lgr 0,%1\n"
1348 "lgr 2,%2\n"
1349 ".long 0xb2af0000\n" /* PQAP(QCI) */
1350 "0: ipm %0\n"
1351 "srl %0,28\n"
1352 "1:\n"
1353 EX_TABLE(0b, 1b)
1354 : "+r" (cc)
1355 : "r" (fcn_code), "r" (config)
1356 : "cc", "0", "2", "memory"
1357 );
1358
1359 return cc;
1360 }
1361
1362 static int kvm_s390_apxa_installed(void)
1363 {
1364 u8 config[128];
1365 int cc;
1366
1367 if (test_facility(12)) {
1368 cc = kvm_s390_query_ap_config(config);
1369
1370 if (cc)
1371 pr_err("PQAP(QCI) failed with cc=%d", cc);
1372 else
1373 return config[0] & 0x40;
1374 }
1375
1376 return 0;
1377 }
1378
1379 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1380 {
1381 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1382
1383 if (kvm_s390_apxa_installed())
1384 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1385 else
1386 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1387 }
1388
1389 static u64 kvm_s390_get_initial_cpuid(void)
1390 {
1391 struct cpuid cpuid;
1392
1393 get_cpu_id(&cpuid);
1394 cpuid.version = 0xff;
1395 return *((u64 *) &cpuid);
1396 }
1397
1398 static void kvm_s390_crypto_init(struct kvm *kvm)
1399 {
1400 if (!test_kvm_facility(kvm, 76))
1401 return;
1402
1403 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1404 kvm_s390_set_crycb_format(kvm);
1405
1406 /* Enable AES/DEA protected key functions by default */
1407 kvm->arch.crypto.aes_kw = 1;
1408 kvm->arch.crypto.dea_kw = 1;
1409 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1410 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1411 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1412 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1413 }
1414
1415 static void sca_dispose(struct kvm *kvm)
1416 {
1417 if (kvm->arch.use_esca)
1418 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1419 else
1420 free_page((unsigned long)(kvm->arch.sca));
1421 kvm->arch.sca = NULL;
1422 }
1423
1424 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1425 {
1426 gfp_t alloc_flags = GFP_KERNEL;
1427 int i, rc;
1428 char debug_name[16];
1429 static unsigned long sca_offset;
1430
1431 rc = -EINVAL;
1432 #ifdef CONFIG_KVM_S390_UCONTROL
1433 if (type & ~KVM_VM_S390_UCONTROL)
1434 goto out_err;
1435 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1436 goto out_err;
1437 #else
1438 if (type)
1439 goto out_err;
1440 #endif
1441
1442 rc = s390_enable_sie();
1443 if (rc)
1444 goto out_err;
1445
1446 rc = -ENOMEM;
1447
1448 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1449
1450 kvm->arch.use_esca = 0; /* start with basic SCA */
1451 if (!sclp.has_64bscao)
1452 alloc_flags |= GFP_DMA;
1453 rwlock_init(&kvm->arch.sca_lock);
1454 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1455 if (!kvm->arch.sca)
1456 goto out_err;
1457 spin_lock(&kvm_lock);
1458 sca_offset += 16;
1459 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1460 sca_offset = 0;
1461 kvm->arch.sca = (struct bsca_block *)
1462 ((char *) kvm->arch.sca + sca_offset);
1463 spin_unlock(&kvm_lock);
1464
1465 sprintf(debug_name, "kvm-%u", current->pid);
1466
1467 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1468 if (!kvm->arch.dbf)
1469 goto out_err;
1470
1471 kvm->arch.sie_page2 =
1472 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1473 if (!kvm->arch.sie_page2)
1474 goto out_err;
1475
1476 /* Populate the facility mask initially. */
1477 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1478 sizeof(S390_lowcore.stfle_fac_list));
1479 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1480 if (i < kvm_s390_fac_list_mask_size())
1481 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1482 else
1483 kvm->arch.model.fac_mask[i] = 0UL;
1484 }
1485
1486 /* Populate the facility list initially. */
1487 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1488 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1489 S390_ARCH_FAC_LIST_SIZE_BYTE);
1490
1491 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1492 set_kvm_facility(kvm->arch.model.fac_list, 74);
1493
1494 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1495 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1496
1497 kvm_s390_crypto_init(kvm);
1498
1499 spin_lock_init(&kvm->arch.float_int.lock);
1500 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1501 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1502 init_waitqueue_head(&kvm->arch.ipte_wq);
1503 mutex_init(&kvm->arch.ipte_mutex);
1504
1505 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1506 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1507
1508 if (type & KVM_VM_S390_UCONTROL) {
1509 kvm->arch.gmap = NULL;
1510 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1511 } else {
1512 if (sclp.hamax == U64_MAX)
1513 kvm->arch.mem_limit = TASK_MAX_SIZE;
1514 else
1515 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1516 sclp.hamax + 1);
1517 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1518 if (!kvm->arch.gmap)
1519 goto out_err;
1520 kvm->arch.gmap->private = kvm;
1521 kvm->arch.gmap->pfault_enabled = 0;
1522 }
1523
1524 kvm->arch.css_support = 0;
1525 kvm->arch.use_irqchip = 0;
1526 kvm->arch.epoch = 0;
1527
1528 spin_lock_init(&kvm->arch.start_stop_lock);
1529 kvm_s390_vsie_init(kvm);
1530 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1531
1532 return 0;
1533 out_err:
1534 free_page((unsigned long)kvm->arch.sie_page2);
1535 debug_unregister(kvm->arch.dbf);
1536 sca_dispose(kvm);
1537 KVM_EVENT(3, "creation of vm failed: %d", rc);
1538 return rc;
1539 }
1540
1541 bool kvm_arch_has_vcpu_debugfs(void)
1542 {
1543 return false;
1544 }
1545
1546 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1547 {
1548 return 0;
1549 }
1550
1551 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1552 {
1553 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1554 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1555 kvm_s390_clear_local_irqs(vcpu);
1556 kvm_clear_async_pf_completion_queue(vcpu);
1557 if (!kvm_is_ucontrol(vcpu->kvm))
1558 sca_del_vcpu(vcpu);
1559
1560 if (kvm_is_ucontrol(vcpu->kvm))
1561 gmap_remove(vcpu->arch.gmap);
1562
1563 if (vcpu->kvm->arch.use_cmma)
1564 kvm_s390_vcpu_unsetup_cmma(vcpu);
1565 free_page((unsigned long)(vcpu->arch.sie_block));
1566
1567 kvm_vcpu_uninit(vcpu);
1568 kmem_cache_free(kvm_vcpu_cache, vcpu);
1569 }
1570
1571 static void kvm_free_vcpus(struct kvm *kvm)
1572 {
1573 unsigned int i;
1574 struct kvm_vcpu *vcpu;
1575
1576 kvm_for_each_vcpu(i, vcpu, kvm)
1577 kvm_arch_vcpu_destroy(vcpu);
1578
1579 mutex_lock(&kvm->lock);
1580 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1581 kvm->vcpus[i] = NULL;
1582
1583 atomic_set(&kvm->online_vcpus, 0);
1584 mutex_unlock(&kvm->lock);
1585 }
1586
1587 void kvm_arch_destroy_vm(struct kvm *kvm)
1588 {
1589 kvm_free_vcpus(kvm);
1590 sca_dispose(kvm);
1591 debug_unregister(kvm->arch.dbf);
1592 free_page((unsigned long)kvm->arch.sie_page2);
1593 if (!kvm_is_ucontrol(kvm))
1594 gmap_remove(kvm->arch.gmap);
1595 kvm_s390_destroy_adapters(kvm);
1596 kvm_s390_clear_float_irqs(kvm);
1597 kvm_s390_vsie_destroy(kvm);
1598 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1599 }
1600
1601 /* Section: vcpu related */
1602 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1603 {
1604 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1605 if (!vcpu->arch.gmap)
1606 return -ENOMEM;
1607 vcpu->arch.gmap->private = vcpu->kvm;
1608
1609 return 0;
1610 }
1611
1612 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1613 {
1614 if (!kvm_s390_use_sca_entries())
1615 return;
1616 read_lock(&vcpu->kvm->arch.sca_lock);
1617 if (vcpu->kvm->arch.use_esca) {
1618 struct esca_block *sca = vcpu->kvm->arch.sca;
1619
1620 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1621 sca->cpu[vcpu->vcpu_id].sda = 0;
1622 } else {
1623 struct bsca_block *sca = vcpu->kvm->arch.sca;
1624
1625 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1626 sca->cpu[vcpu->vcpu_id].sda = 0;
1627 }
1628 read_unlock(&vcpu->kvm->arch.sca_lock);
1629 }
1630
1631 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1632 {
1633 if (!kvm_s390_use_sca_entries()) {
1634 struct bsca_block *sca = vcpu->kvm->arch.sca;
1635
1636 /* we still need the basic sca for the ipte control */
1637 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1638 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1639 }
1640 read_lock(&vcpu->kvm->arch.sca_lock);
1641 if (vcpu->kvm->arch.use_esca) {
1642 struct esca_block *sca = vcpu->kvm->arch.sca;
1643
1644 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1645 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1646 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1647 vcpu->arch.sie_block->ecb2 |= 0x04U;
1648 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1649 } else {
1650 struct bsca_block *sca = vcpu->kvm->arch.sca;
1651
1652 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1653 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1654 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1655 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1656 }
1657 read_unlock(&vcpu->kvm->arch.sca_lock);
1658 }
1659
1660 /* Basic SCA to Extended SCA data copy routines */
1661 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1662 {
1663 d->sda = s->sda;
1664 d->sigp_ctrl.c = s->sigp_ctrl.c;
1665 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1666 }
1667
1668 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1669 {
1670 int i;
1671
1672 d->ipte_control = s->ipte_control;
1673 d->mcn[0] = s->mcn;
1674 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1675 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1676 }
1677
1678 static int sca_switch_to_extended(struct kvm *kvm)
1679 {
1680 struct bsca_block *old_sca = kvm->arch.sca;
1681 struct esca_block *new_sca;
1682 struct kvm_vcpu *vcpu;
1683 unsigned int vcpu_idx;
1684 u32 scaol, scaoh;
1685
1686 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1687 if (!new_sca)
1688 return -ENOMEM;
1689
1690 scaoh = (u32)((u64)(new_sca) >> 32);
1691 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1692
1693 kvm_s390_vcpu_block_all(kvm);
1694 write_lock(&kvm->arch.sca_lock);
1695
1696 sca_copy_b_to_e(new_sca, old_sca);
1697
1698 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1699 vcpu->arch.sie_block->scaoh = scaoh;
1700 vcpu->arch.sie_block->scaol = scaol;
1701 vcpu->arch.sie_block->ecb2 |= 0x04U;
1702 }
1703 kvm->arch.sca = new_sca;
1704 kvm->arch.use_esca = 1;
1705
1706 write_unlock(&kvm->arch.sca_lock);
1707 kvm_s390_vcpu_unblock_all(kvm);
1708
1709 free_page((unsigned long)old_sca);
1710
1711 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1712 old_sca, kvm->arch.sca);
1713 return 0;
1714 }
1715
1716 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1717 {
1718 int rc;
1719
1720 if (!kvm_s390_use_sca_entries()) {
1721 if (id < KVM_MAX_VCPUS)
1722 return true;
1723 return false;
1724 }
1725 if (id < KVM_S390_BSCA_CPU_SLOTS)
1726 return true;
1727 if (!sclp.has_esca || !sclp.has_64bscao)
1728 return false;
1729
1730 mutex_lock(&kvm->lock);
1731 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1732 mutex_unlock(&kvm->lock);
1733
1734 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1735 }
1736
1737 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1738 {
1739 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1740 kvm_clear_async_pf_completion_queue(vcpu);
1741 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1742 KVM_SYNC_GPRS |
1743 KVM_SYNC_ACRS |
1744 KVM_SYNC_CRS |
1745 KVM_SYNC_ARCH0 |
1746 KVM_SYNC_PFAULT;
1747 kvm_s390_set_prefix(vcpu, 0);
1748 if (test_kvm_facility(vcpu->kvm, 64))
1749 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1750 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1751 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1752 */
1753 if (MACHINE_HAS_VX)
1754 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1755 else
1756 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1757
1758 if (kvm_is_ucontrol(vcpu->kvm))
1759 return __kvm_ucontrol_vcpu_init(vcpu);
1760
1761 return 0;
1762 }
1763
1764 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1765 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1766 {
1767 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1768 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1769 vcpu->arch.cputm_start = get_tod_clock_fast();
1770 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1771 }
1772
1773 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1774 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1775 {
1776 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1777 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1778 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1779 vcpu->arch.cputm_start = 0;
1780 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1781 }
1782
1783 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1784 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1785 {
1786 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1787 vcpu->arch.cputm_enabled = true;
1788 __start_cpu_timer_accounting(vcpu);
1789 }
1790
1791 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1792 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1793 {
1794 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1795 __stop_cpu_timer_accounting(vcpu);
1796 vcpu->arch.cputm_enabled = false;
1797 }
1798
1799 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1800 {
1801 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1802 __enable_cpu_timer_accounting(vcpu);
1803 preempt_enable();
1804 }
1805
1806 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1807 {
1808 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1809 __disable_cpu_timer_accounting(vcpu);
1810 preempt_enable();
1811 }
1812
1813 /* set the cpu timer - may only be called from the VCPU thread itself */
1814 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1815 {
1816 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1817 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1818 if (vcpu->arch.cputm_enabled)
1819 vcpu->arch.cputm_start = get_tod_clock_fast();
1820 vcpu->arch.sie_block->cputm = cputm;
1821 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1822 preempt_enable();
1823 }
1824
1825 /* update and get the cpu timer - can also be called from other VCPU threads */
1826 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1827 {
1828 unsigned int seq;
1829 __u64 value;
1830
1831 if (unlikely(!vcpu->arch.cputm_enabled))
1832 return vcpu->arch.sie_block->cputm;
1833
1834 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1835 do {
1836 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1837 /*
1838 * If the writer would ever execute a read in the critical
1839 * section, e.g. in irq context, we have a deadlock.
1840 */
1841 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1842 value = vcpu->arch.sie_block->cputm;
1843 /* if cputm_start is 0, accounting is being started/stopped */
1844 if (likely(vcpu->arch.cputm_start))
1845 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1846 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1847 preempt_enable();
1848 return value;
1849 }
1850
1851 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1852 {
1853
1854 gmap_enable(vcpu->arch.enabled_gmap);
1855 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1856 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1857 __start_cpu_timer_accounting(vcpu);
1858 vcpu->cpu = cpu;
1859 }
1860
1861 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1862 {
1863 vcpu->cpu = -1;
1864 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1865 __stop_cpu_timer_accounting(vcpu);
1866 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1867 vcpu->arch.enabled_gmap = gmap_get_enabled();
1868 gmap_disable(vcpu->arch.enabled_gmap);
1869
1870 }
1871
1872 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1873 {
1874 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1875 vcpu->arch.sie_block->gpsw.mask = 0UL;
1876 vcpu->arch.sie_block->gpsw.addr = 0UL;
1877 kvm_s390_set_prefix(vcpu, 0);
1878 kvm_s390_set_cpu_timer(vcpu, 0);
1879 vcpu->arch.sie_block->ckc = 0UL;
1880 vcpu->arch.sie_block->todpr = 0;
1881 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1882 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1883 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1884 /* make sure the new fpc will be lazily loaded */
1885 save_fpu_regs();
1886 current->thread.fpu.fpc = 0;
1887 vcpu->arch.sie_block->gbea = 1;
1888 vcpu->arch.sie_block->pp = 0;
1889 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1890 kvm_clear_async_pf_completion_queue(vcpu);
1891 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1892 kvm_s390_vcpu_stop(vcpu);
1893 kvm_s390_clear_local_irqs(vcpu);
1894 }
1895
1896 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1897 {
1898 mutex_lock(&vcpu->kvm->lock);
1899 preempt_disable();
1900 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1901 preempt_enable();
1902 mutex_unlock(&vcpu->kvm->lock);
1903 if (!kvm_is_ucontrol(vcpu->kvm)) {
1904 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1905 sca_add_vcpu(vcpu);
1906 }
1907 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1908 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1909 /* make vcpu_load load the right gmap on the first trigger */
1910 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1911 }
1912
1913 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1914 {
1915 if (!test_kvm_facility(vcpu->kvm, 76))
1916 return;
1917
1918 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1919
1920 if (vcpu->kvm->arch.crypto.aes_kw)
1921 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1922 if (vcpu->kvm->arch.crypto.dea_kw)
1923 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1924
1925 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1926 }
1927
1928 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1929 {
1930 free_page(vcpu->arch.sie_block->cbrlo);
1931 vcpu->arch.sie_block->cbrlo = 0;
1932 }
1933
1934 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1935 {
1936 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1937 if (!vcpu->arch.sie_block->cbrlo)
1938 return -ENOMEM;
1939
1940 vcpu->arch.sie_block->ecb2 |= 0x80;
1941 vcpu->arch.sie_block->ecb2 &= ~0x08;
1942 return 0;
1943 }
1944
1945 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1946 {
1947 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1948
1949 vcpu->arch.sie_block->ibc = model->ibc;
1950 if (test_kvm_facility(vcpu->kvm, 7))
1951 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1952 }
1953
1954 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1955 {
1956 int rc = 0;
1957
1958 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1959 CPUSTAT_SM |
1960 CPUSTAT_STOPPED);
1961
1962 if (test_kvm_facility(vcpu->kvm, 78))
1963 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1964 else if (test_kvm_facility(vcpu->kvm, 8))
1965 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1966
1967 kvm_s390_vcpu_setup_model(vcpu);
1968
1969 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1970 if (MACHINE_HAS_ESOP)
1971 vcpu->arch.sie_block->ecb |= 0x02;
1972 if (test_kvm_facility(vcpu->kvm, 9))
1973 vcpu->arch.sie_block->ecb |= 0x04;
1974 if (test_kvm_facility(vcpu->kvm, 73))
1975 vcpu->arch.sie_block->ecb |= 0x10;
1976
1977 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1978 vcpu->arch.sie_block->ecb2 |= 0x08;
1979 if (test_kvm_facility(vcpu->kvm, 130))
1980 vcpu->arch.sie_block->ecb2 |= 0x20;
1981 vcpu->arch.sie_block->eca = 0x1002000U;
1982 if (sclp.has_cei)
1983 vcpu->arch.sie_block->eca |= 0x80000000U;
1984 if (sclp.has_ib)
1985 vcpu->arch.sie_block->eca |= 0x40000000U;
1986 if (sclp.has_siif)
1987 vcpu->arch.sie_block->eca |= 1;
1988 if (sclp.has_sigpif)
1989 vcpu->arch.sie_block->eca |= 0x10000000U;
1990 if (test_kvm_facility(vcpu->kvm, 129)) {
1991 vcpu->arch.sie_block->eca |= 0x00020000;
1992 vcpu->arch.sie_block->ecd |= 0x20000000;
1993 }
1994 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1995 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1996
1997 if (vcpu->kvm->arch.use_cmma) {
1998 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1999 if (rc)
2000 return rc;
2001 }
2002 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2003 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2004
2005 kvm_s390_vcpu_crypto_setup(vcpu);
2006
2007 return rc;
2008 }
2009
2010 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2011 unsigned int id)
2012 {
2013 struct kvm_vcpu *vcpu;
2014 struct sie_page *sie_page;
2015 int rc = -EINVAL;
2016
2017 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2018 goto out;
2019
2020 rc = -ENOMEM;
2021
2022 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2023 if (!vcpu)
2024 goto out;
2025
2026 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2027 if (!sie_page)
2028 goto out_free_cpu;
2029
2030 vcpu->arch.sie_block = &sie_page->sie_block;
2031 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2032
2033 /* the real guest size will always be smaller than msl */
2034 vcpu->arch.sie_block->mso = 0;
2035 vcpu->arch.sie_block->msl = sclp.hamax;
2036
2037 vcpu->arch.sie_block->icpua = id;
2038 spin_lock_init(&vcpu->arch.local_int.lock);
2039 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2040 vcpu->arch.local_int.wq = &vcpu->wq;
2041 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2042 seqcount_init(&vcpu->arch.cputm_seqcount);
2043
2044 rc = kvm_vcpu_init(vcpu, kvm, id);
2045 if (rc)
2046 goto out_free_sie_block;
2047 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2048 vcpu->arch.sie_block);
2049 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2050
2051 return vcpu;
2052 out_free_sie_block:
2053 free_page((unsigned long)(vcpu->arch.sie_block));
2054 out_free_cpu:
2055 kmem_cache_free(kvm_vcpu_cache, vcpu);
2056 out:
2057 return ERR_PTR(rc);
2058 }
2059
2060 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2061 {
2062 return kvm_s390_vcpu_has_irq(vcpu, 0);
2063 }
2064
2065 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2066 {
2067 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2068 exit_sie(vcpu);
2069 }
2070
2071 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2072 {
2073 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2074 }
2075
2076 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2077 {
2078 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2079 exit_sie(vcpu);
2080 }
2081
2082 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2083 {
2084 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2085 }
2086
2087 /*
2088 * Kick a guest cpu out of SIE and wait until SIE is not running.
2089 * If the CPU is not running (e.g. waiting as idle) the function will
2090 * return immediately. */
2091 void exit_sie(struct kvm_vcpu *vcpu)
2092 {
2093 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2094 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2095 cpu_relax();
2096 }
2097
2098 /* Kick a guest cpu out of SIE to process a request synchronously */
2099 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2100 {
2101 kvm_make_request(req, vcpu);
2102 kvm_s390_vcpu_request(vcpu);
2103 }
2104
2105 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2106 unsigned long end)
2107 {
2108 struct kvm *kvm = gmap->private;
2109 struct kvm_vcpu *vcpu;
2110 unsigned long prefix;
2111 int i;
2112
2113 if (gmap_is_shadow(gmap))
2114 return;
2115 if (start >= 1UL << 31)
2116 /* We are only interested in prefix pages */
2117 return;
2118 kvm_for_each_vcpu(i, vcpu, kvm) {
2119 /* match against both prefix pages */
2120 prefix = kvm_s390_get_prefix(vcpu);
2121 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2122 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2123 start, end);
2124 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2125 }
2126 }
2127 }
2128
2129 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2130 {
2131 /* kvm common code refers to this, but never calls it */
2132 BUG();
2133 return 0;
2134 }
2135
2136 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2137 struct kvm_one_reg *reg)
2138 {
2139 int r = -EINVAL;
2140
2141 switch (reg->id) {
2142 case KVM_REG_S390_TODPR:
2143 r = put_user(vcpu->arch.sie_block->todpr,
2144 (u32 __user *)reg->addr);
2145 break;
2146 case KVM_REG_S390_EPOCHDIFF:
2147 r = put_user(vcpu->arch.sie_block->epoch,
2148 (u64 __user *)reg->addr);
2149 break;
2150 case KVM_REG_S390_CPU_TIMER:
2151 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2152 (u64 __user *)reg->addr);
2153 break;
2154 case KVM_REG_S390_CLOCK_COMP:
2155 r = put_user(vcpu->arch.sie_block->ckc,
2156 (u64 __user *)reg->addr);
2157 break;
2158 case KVM_REG_S390_PFTOKEN:
2159 r = put_user(vcpu->arch.pfault_token,
2160 (u64 __user *)reg->addr);
2161 break;
2162 case KVM_REG_S390_PFCOMPARE:
2163 r = put_user(vcpu->arch.pfault_compare,
2164 (u64 __user *)reg->addr);
2165 break;
2166 case KVM_REG_S390_PFSELECT:
2167 r = put_user(vcpu->arch.pfault_select,
2168 (u64 __user *)reg->addr);
2169 break;
2170 case KVM_REG_S390_PP:
2171 r = put_user(vcpu->arch.sie_block->pp,
2172 (u64 __user *)reg->addr);
2173 break;
2174 case KVM_REG_S390_GBEA:
2175 r = put_user(vcpu->arch.sie_block->gbea,
2176 (u64 __user *)reg->addr);
2177 break;
2178 default:
2179 break;
2180 }
2181
2182 return r;
2183 }
2184
2185 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2186 struct kvm_one_reg *reg)
2187 {
2188 int r = -EINVAL;
2189 __u64 val;
2190
2191 switch (reg->id) {
2192 case KVM_REG_S390_TODPR:
2193 r = get_user(vcpu->arch.sie_block->todpr,
2194 (u32 __user *)reg->addr);
2195 break;
2196 case KVM_REG_S390_EPOCHDIFF:
2197 r = get_user(vcpu->arch.sie_block->epoch,
2198 (u64 __user *)reg->addr);
2199 break;
2200 case KVM_REG_S390_CPU_TIMER:
2201 r = get_user(val, (u64 __user *)reg->addr);
2202 if (!r)
2203 kvm_s390_set_cpu_timer(vcpu, val);
2204 break;
2205 case KVM_REG_S390_CLOCK_COMP:
2206 r = get_user(vcpu->arch.sie_block->ckc,
2207 (u64 __user *)reg->addr);
2208 break;
2209 case KVM_REG_S390_PFTOKEN:
2210 r = get_user(vcpu->arch.pfault_token,
2211 (u64 __user *)reg->addr);
2212 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2213 kvm_clear_async_pf_completion_queue(vcpu);
2214 break;
2215 case KVM_REG_S390_PFCOMPARE:
2216 r = get_user(vcpu->arch.pfault_compare,
2217 (u64 __user *)reg->addr);
2218 break;
2219 case KVM_REG_S390_PFSELECT:
2220 r = get_user(vcpu->arch.pfault_select,
2221 (u64 __user *)reg->addr);
2222 break;
2223 case KVM_REG_S390_PP:
2224 r = get_user(vcpu->arch.sie_block->pp,
2225 (u64 __user *)reg->addr);
2226 break;
2227 case KVM_REG_S390_GBEA:
2228 r = get_user(vcpu->arch.sie_block->gbea,
2229 (u64 __user *)reg->addr);
2230 break;
2231 default:
2232 break;
2233 }
2234
2235 return r;
2236 }
2237
2238 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2239 {
2240 kvm_s390_vcpu_initial_reset(vcpu);
2241 return 0;
2242 }
2243
2244 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2245 {
2246 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2247 return 0;
2248 }
2249
2250 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2251 {
2252 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2253 return 0;
2254 }
2255
2256 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2257 struct kvm_sregs *sregs)
2258 {
2259 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2260 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2261 return 0;
2262 }
2263
2264 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2265 struct kvm_sregs *sregs)
2266 {
2267 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2268 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2269 return 0;
2270 }
2271
2272 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2273 {
2274 if (test_fp_ctl(fpu->fpc))
2275 return -EINVAL;
2276 vcpu->run->s.regs.fpc = fpu->fpc;
2277 if (MACHINE_HAS_VX)
2278 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2279 (freg_t *) fpu->fprs);
2280 else
2281 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2282 return 0;
2283 }
2284
2285 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2286 {
2287 /* make sure we have the latest values */
2288 save_fpu_regs();
2289 if (MACHINE_HAS_VX)
2290 convert_vx_to_fp((freg_t *) fpu->fprs,
2291 (__vector128 *) vcpu->run->s.regs.vrs);
2292 else
2293 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2294 fpu->fpc = vcpu->run->s.regs.fpc;
2295 return 0;
2296 }
2297
2298 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2299 {
2300 int rc = 0;
2301
2302 if (!is_vcpu_stopped(vcpu))
2303 rc = -EBUSY;
2304 else {
2305 vcpu->run->psw_mask = psw.mask;
2306 vcpu->run->psw_addr = psw.addr;
2307 }
2308 return rc;
2309 }
2310
2311 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2312 struct kvm_translation *tr)
2313 {
2314 return -EINVAL; /* not implemented yet */
2315 }
2316
2317 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2318 KVM_GUESTDBG_USE_HW_BP | \
2319 KVM_GUESTDBG_ENABLE)
2320
2321 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2322 struct kvm_guest_debug *dbg)
2323 {
2324 int rc = 0;
2325
2326 vcpu->guest_debug = 0;
2327 kvm_s390_clear_bp_data(vcpu);
2328
2329 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2330 return -EINVAL;
2331 if (!sclp.has_gpere)
2332 return -EINVAL;
2333
2334 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2335 vcpu->guest_debug = dbg->control;
2336 /* enforce guest PER */
2337 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2338
2339 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2340 rc = kvm_s390_import_bp_data(vcpu, dbg);
2341 } else {
2342 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2343 vcpu->arch.guestdbg.last_bp = 0;
2344 }
2345
2346 if (rc) {
2347 vcpu->guest_debug = 0;
2348 kvm_s390_clear_bp_data(vcpu);
2349 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2350 }
2351
2352 return rc;
2353 }
2354
2355 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2356 struct kvm_mp_state *mp_state)
2357 {
2358 /* CHECK_STOP and LOAD are not supported yet */
2359 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2360 KVM_MP_STATE_OPERATING;
2361 }
2362
2363 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2364 struct kvm_mp_state *mp_state)
2365 {
2366 int rc = 0;
2367
2368 /* user space knows about this interface - let it control the state */
2369 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2370
2371 switch (mp_state->mp_state) {
2372 case KVM_MP_STATE_STOPPED:
2373 kvm_s390_vcpu_stop(vcpu);
2374 break;
2375 case KVM_MP_STATE_OPERATING:
2376 kvm_s390_vcpu_start(vcpu);
2377 break;
2378 case KVM_MP_STATE_LOAD:
2379 case KVM_MP_STATE_CHECK_STOP:
2380 /* fall through - CHECK_STOP and LOAD are not supported yet */
2381 default:
2382 rc = -ENXIO;
2383 }
2384
2385 return rc;
2386 }
2387
2388 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2389 {
2390 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2391 }
2392
2393 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2394 {
2395 retry:
2396 kvm_s390_vcpu_request_handled(vcpu);
2397 if (!vcpu->requests)
2398 return 0;
2399 /*
2400 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2401 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2402 * This ensures that the ipte instruction for this request has
2403 * already finished. We might race against a second unmapper that
2404 * wants to set the blocking bit. Lets just retry the request loop.
2405 */
2406 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2407 int rc;
2408 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2409 kvm_s390_get_prefix(vcpu),
2410 PAGE_SIZE * 2, PROT_WRITE);
2411 if (rc) {
2412 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2413 return rc;
2414 }
2415 goto retry;
2416 }
2417
2418 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2419 vcpu->arch.sie_block->ihcpu = 0xffff;
2420 goto retry;
2421 }
2422
2423 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2424 if (!ibs_enabled(vcpu)) {
2425 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2426 atomic_or(CPUSTAT_IBS,
2427 &vcpu->arch.sie_block->cpuflags);
2428 }
2429 goto retry;
2430 }
2431
2432 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2433 if (ibs_enabled(vcpu)) {
2434 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2435 atomic_andnot(CPUSTAT_IBS,
2436 &vcpu->arch.sie_block->cpuflags);
2437 }
2438 goto retry;
2439 }
2440
2441 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2442 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2443 goto retry;
2444 }
2445
2446 /* nothing to do, just clear the request */
2447 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2448
2449 return 0;
2450 }
2451
2452 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2453 {
2454 struct kvm_vcpu *vcpu;
2455 int i;
2456
2457 mutex_lock(&kvm->lock);
2458 preempt_disable();
2459 kvm->arch.epoch = tod - get_tod_clock();
2460 kvm_s390_vcpu_block_all(kvm);
2461 kvm_for_each_vcpu(i, vcpu, kvm)
2462 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2463 kvm_s390_vcpu_unblock_all(kvm);
2464 preempt_enable();
2465 mutex_unlock(&kvm->lock);
2466 }
2467
2468 /**
2469 * kvm_arch_fault_in_page - fault-in guest page if necessary
2470 * @vcpu: The corresponding virtual cpu
2471 * @gpa: Guest physical address
2472 * @writable: Whether the page should be writable or not
2473 *
2474 * Make sure that a guest page has been faulted-in on the host.
2475 *
2476 * Return: Zero on success, negative error code otherwise.
2477 */
2478 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2479 {
2480 return gmap_fault(vcpu->arch.gmap, gpa,
2481 writable ? FAULT_FLAG_WRITE : 0);
2482 }
2483
2484 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2485 unsigned long token)
2486 {
2487 struct kvm_s390_interrupt inti;
2488 struct kvm_s390_irq irq;
2489
2490 if (start_token) {
2491 irq.u.ext.ext_params2 = token;
2492 irq.type = KVM_S390_INT_PFAULT_INIT;
2493 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2494 } else {
2495 inti.type = KVM_S390_INT_PFAULT_DONE;
2496 inti.parm64 = token;
2497 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2498 }
2499 }
2500
2501 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2502 struct kvm_async_pf *work)
2503 {
2504 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2505 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2506 }
2507
2508 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2509 struct kvm_async_pf *work)
2510 {
2511 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2512 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2513 }
2514
2515 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2516 struct kvm_async_pf *work)
2517 {
2518 /* s390 will always inject the page directly */
2519 }
2520
2521 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2522 {
2523 /*
2524 * s390 will always inject the page directly,
2525 * but we still want check_async_completion to cleanup
2526 */
2527 return true;
2528 }
2529
2530 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2531 {
2532 hva_t hva;
2533 struct kvm_arch_async_pf arch;
2534 int rc;
2535
2536 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2537 return 0;
2538 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2539 vcpu->arch.pfault_compare)
2540 return 0;
2541 if (psw_extint_disabled(vcpu))
2542 return 0;
2543 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2544 return 0;
2545 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2546 return 0;
2547 if (!vcpu->arch.gmap->pfault_enabled)
2548 return 0;
2549
2550 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2551 hva += current->thread.gmap_addr & ~PAGE_MASK;
2552 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2553 return 0;
2554
2555 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2556 return rc;
2557 }
2558
2559 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2560 {
2561 int rc, cpuflags;
2562
2563 /*
2564 * On s390 notifications for arriving pages will be delivered directly
2565 * to the guest but the house keeping for completed pfaults is
2566 * handled outside the worker.
2567 */
2568 kvm_check_async_pf_completion(vcpu);
2569
2570 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2571 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2572
2573 if (need_resched())
2574 schedule();
2575
2576 if (test_cpu_flag(CIF_MCCK_PENDING))
2577 s390_handle_mcck();
2578
2579 if (!kvm_is_ucontrol(vcpu->kvm)) {
2580 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2581 if (rc)
2582 return rc;
2583 }
2584
2585 rc = kvm_s390_handle_requests(vcpu);
2586 if (rc)
2587 return rc;
2588
2589 if (guestdbg_enabled(vcpu)) {
2590 kvm_s390_backup_guest_per_regs(vcpu);
2591 kvm_s390_patch_guest_per_regs(vcpu);
2592 }
2593
2594 vcpu->arch.sie_block->icptcode = 0;
2595 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2596 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2597 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2598
2599 return 0;
2600 }
2601
2602 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2603 {
2604 struct kvm_s390_pgm_info pgm_info = {
2605 .code = PGM_ADDRESSING,
2606 };
2607 u8 opcode, ilen;
2608 int rc;
2609
2610 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2611 trace_kvm_s390_sie_fault(vcpu);
2612
2613 /*
2614 * We want to inject an addressing exception, which is defined as a
2615 * suppressing or terminating exception. However, since we came here
2616 * by a DAT access exception, the PSW still points to the faulting
2617 * instruction since DAT exceptions are nullifying. So we've got
2618 * to look up the current opcode to get the length of the instruction
2619 * to be able to forward the PSW.
2620 */
2621 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2622 ilen = insn_length(opcode);
2623 if (rc < 0) {
2624 return rc;
2625 } else if (rc) {
2626 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2627 * Forward by arbitrary ilc, injection will take care of
2628 * nullification if necessary.
2629 */
2630 pgm_info = vcpu->arch.pgm;
2631 ilen = 4;
2632 }
2633 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2634 kvm_s390_forward_psw(vcpu, ilen);
2635 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2636 }
2637
2638 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2639 {
2640 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2641 vcpu->arch.sie_block->icptcode);
2642 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2643
2644 if (guestdbg_enabled(vcpu))
2645 kvm_s390_restore_guest_per_regs(vcpu);
2646
2647 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2648 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2649
2650 if (vcpu->arch.sie_block->icptcode > 0) {
2651 int rc = kvm_handle_sie_intercept(vcpu);
2652
2653 if (rc != -EOPNOTSUPP)
2654 return rc;
2655 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2656 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2657 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2658 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2659 return -EREMOTE;
2660 } else if (exit_reason != -EFAULT) {
2661 vcpu->stat.exit_null++;
2662 return 0;
2663 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2664 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2665 vcpu->run->s390_ucontrol.trans_exc_code =
2666 current->thread.gmap_addr;
2667 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2668 return -EREMOTE;
2669 } else if (current->thread.gmap_pfault) {
2670 trace_kvm_s390_major_guest_pfault(vcpu);
2671 current->thread.gmap_pfault = 0;
2672 if (kvm_arch_setup_async_pf(vcpu))
2673 return 0;
2674 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2675 }
2676 return vcpu_post_run_fault_in_sie(vcpu);
2677 }
2678
2679 static int __vcpu_run(struct kvm_vcpu *vcpu)
2680 {
2681 int rc, exit_reason;
2682
2683 /*
2684 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2685 * ning the guest), so that memslots (and other stuff) are protected
2686 */
2687 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2688
2689 do {
2690 rc = vcpu_pre_run(vcpu);
2691 if (rc)
2692 break;
2693
2694 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2695 /*
2696 * As PF_VCPU will be used in fault handler, between
2697 * guest_enter and guest_exit should be no uaccess.
2698 */
2699 local_irq_disable();
2700 guest_enter_irqoff();
2701 __disable_cpu_timer_accounting(vcpu);
2702 local_irq_enable();
2703 exit_reason = sie64a(vcpu->arch.sie_block,
2704 vcpu->run->s.regs.gprs);
2705 local_irq_disable();
2706 __enable_cpu_timer_accounting(vcpu);
2707 guest_exit_irqoff();
2708 local_irq_enable();
2709 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2710
2711 rc = vcpu_post_run(vcpu, exit_reason);
2712 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2713
2714 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2715 return rc;
2716 }
2717
2718 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2719 {
2720 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2721 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2722 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2723 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2724 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2725 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2726 /* some control register changes require a tlb flush */
2727 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2728 }
2729 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2730 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2731 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2732 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2733 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2734 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2735 }
2736 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2737 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2738 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2739 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2740 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2741 kvm_clear_async_pf_completion_queue(vcpu);
2742 }
2743 /*
2744 * If userspace sets the riccb (e.g. after migration) to a valid state,
2745 * we should enable RI here instead of doing the lazy enablement.
2746 */
2747 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2748 test_kvm_facility(vcpu->kvm, 64)) {
2749 struct runtime_instr_cb *riccb =
2750 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2751
2752 if (riccb->valid)
2753 vcpu->arch.sie_block->ecb3 |= 0x01;
2754 }
2755 save_access_regs(vcpu->arch.host_acrs);
2756 restore_access_regs(vcpu->run->s.regs.acrs);
2757 /* save host (userspace) fprs/vrs */
2758 save_fpu_regs();
2759 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2760 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2761 if (MACHINE_HAS_VX)
2762 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2763 else
2764 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2765 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2766 if (test_fp_ctl(current->thread.fpu.fpc))
2767 /* User space provided an invalid FPC, let's clear it */
2768 current->thread.fpu.fpc = 0;
2769
2770 kvm_run->kvm_dirty_regs = 0;
2771 }
2772
2773 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2774 {
2775 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2776 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2777 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2778 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2779 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2780 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2781 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2782 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2783 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2784 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2785 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2786 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2787 save_access_regs(vcpu->run->s.regs.acrs);
2788 restore_access_regs(vcpu->arch.host_acrs);
2789 /* Save guest register state */
2790 save_fpu_regs();
2791 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2792 /* Restore will be done lazily at return */
2793 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2794 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2795
2796 }
2797
2798 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2799 {
2800 int rc;
2801 sigset_t sigsaved;
2802
2803 if (kvm_run->immediate_exit)
2804 return -EINTR;
2805
2806 if (guestdbg_exit_pending(vcpu)) {
2807 kvm_s390_prepare_debug_exit(vcpu);
2808 return 0;
2809 }
2810
2811 if (vcpu->sigset_active)
2812 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2813
2814 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2815 kvm_s390_vcpu_start(vcpu);
2816 } else if (is_vcpu_stopped(vcpu)) {
2817 pr_err_ratelimited("can't run stopped vcpu %d\n",
2818 vcpu->vcpu_id);
2819 return -EINVAL;
2820 }
2821
2822 sync_regs(vcpu, kvm_run);
2823 enable_cpu_timer_accounting(vcpu);
2824
2825 might_fault();
2826 rc = __vcpu_run(vcpu);
2827
2828 if (signal_pending(current) && !rc) {
2829 kvm_run->exit_reason = KVM_EXIT_INTR;
2830 rc = -EINTR;
2831 }
2832
2833 if (guestdbg_exit_pending(vcpu) && !rc) {
2834 kvm_s390_prepare_debug_exit(vcpu);
2835 rc = 0;
2836 }
2837
2838 if (rc == -EREMOTE) {
2839 /* userspace support is needed, kvm_run has been prepared */
2840 rc = 0;
2841 }
2842
2843 disable_cpu_timer_accounting(vcpu);
2844 store_regs(vcpu, kvm_run);
2845
2846 if (vcpu->sigset_active)
2847 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2848
2849 vcpu->stat.exit_userspace++;
2850 return rc;
2851 }
2852
2853 /*
2854 * store status at address
2855 * we use have two special cases:
2856 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2857 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2858 */
2859 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2860 {
2861 unsigned char archmode = 1;
2862 freg_t fprs[NUM_FPRS];
2863 unsigned int px;
2864 u64 clkcomp, cputm;
2865 int rc;
2866
2867 px = kvm_s390_get_prefix(vcpu);
2868 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2869 if (write_guest_abs(vcpu, 163, &archmode, 1))
2870 return -EFAULT;
2871 gpa = 0;
2872 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2873 if (write_guest_real(vcpu, 163, &archmode, 1))
2874 return -EFAULT;
2875 gpa = px;
2876 } else
2877 gpa -= __LC_FPREGS_SAVE_AREA;
2878
2879 /* manually convert vector registers if necessary */
2880 if (MACHINE_HAS_VX) {
2881 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2882 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2883 fprs, 128);
2884 } else {
2885 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2886 vcpu->run->s.regs.fprs, 128);
2887 }
2888 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2889 vcpu->run->s.regs.gprs, 128);
2890 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2891 &vcpu->arch.sie_block->gpsw, 16);
2892 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2893 &px, 4);
2894 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2895 &vcpu->run->s.regs.fpc, 4);
2896 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2897 &vcpu->arch.sie_block->todpr, 4);
2898 cputm = kvm_s390_get_cpu_timer(vcpu);
2899 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2900 &cputm, 8);
2901 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2902 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2903 &clkcomp, 8);
2904 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2905 &vcpu->run->s.regs.acrs, 64);
2906 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2907 &vcpu->arch.sie_block->gcr, 128);
2908 return rc ? -EFAULT : 0;
2909 }
2910
2911 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2912 {
2913 /*
2914 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2915 * switch in the run ioctl. Let's update our copies before we save
2916 * it into the save area
2917 */
2918 save_fpu_regs();
2919 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2920 save_access_regs(vcpu->run->s.regs.acrs);
2921
2922 return kvm_s390_store_status_unloaded(vcpu, addr);
2923 }
2924
2925 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2926 {
2927 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2928 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2929 }
2930
2931 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2932 {
2933 unsigned int i;
2934 struct kvm_vcpu *vcpu;
2935
2936 kvm_for_each_vcpu(i, vcpu, kvm) {
2937 __disable_ibs_on_vcpu(vcpu);
2938 }
2939 }
2940
2941 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2942 {
2943 if (!sclp.has_ibs)
2944 return;
2945 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2946 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2947 }
2948
2949 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2950 {
2951 int i, online_vcpus, started_vcpus = 0;
2952
2953 if (!is_vcpu_stopped(vcpu))
2954 return;
2955
2956 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2957 /* Only one cpu at a time may enter/leave the STOPPED state. */
2958 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2959 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2960
2961 for (i = 0; i < online_vcpus; i++) {
2962 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2963 started_vcpus++;
2964 }
2965
2966 if (started_vcpus == 0) {
2967 /* we're the only active VCPU -> speed it up */
2968 __enable_ibs_on_vcpu(vcpu);
2969 } else if (started_vcpus == 1) {
2970 /*
2971 * As we are starting a second VCPU, we have to disable
2972 * the IBS facility on all VCPUs to remove potentially
2973 * oustanding ENABLE requests.
2974 */
2975 __disable_ibs_on_all_vcpus(vcpu->kvm);
2976 }
2977
2978 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2979 /*
2980 * Another VCPU might have used IBS while we were offline.
2981 * Let's play safe and flush the VCPU at startup.
2982 */
2983 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2984 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2985 return;
2986 }
2987
2988 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2989 {
2990 int i, online_vcpus, started_vcpus = 0;
2991 struct kvm_vcpu *started_vcpu = NULL;
2992
2993 if (is_vcpu_stopped(vcpu))
2994 return;
2995
2996 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2997 /* Only one cpu at a time may enter/leave the STOPPED state. */
2998 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2999 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3000
3001 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3002 kvm_s390_clear_stop_irq(vcpu);
3003
3004 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3005 __disable_ibs_on_vcpu(vcpu);
3006
3007 for (i = 0; i < online_vcpus; i++) {
3008 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3009 started_vcpus++;
3010 started_vcpu = vcpu->kvm->vcpus[i];
3011 }
3012 }
3013
3014 if (started_vcpus == 1) {
3015 /*
3016 * As we only have one VCPU left, we want to enable the
3017 * IBS facility for that VCPU to speed it up.
3018 */
3019 __enable_ibs_on_vcpu(started_vcpu);
3020 }
3021
3022 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3023 return;
3024 }
3025
3026 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3027 struct kvm_enable_cap *cap)
3028 {
3029 int r;
3030
3031 if (cap->flags)
3032 return -EINVAL;
3033
3034 switch (cap->cap) {
3035 case KVM_CAP_S390_CSS_SUPPORT:
3036 if (!vcpu->kvm->arch.css_support) {
3037 vcpu->kvm->arch.css_support = 1;
3038 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3039 trace_kvm_s390_enable_css(vcpu->kvm);
3040 }
3041 r = 0;
3042 break;
3043 default:
3044 r = -EINVAL;
3045 break;
3046 }
3047 return r;
3048 }
3049
3050 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3051 struct kvm_s390_mem_op *mop)
3052 {
3053 void __user *uaddr = (void __user *)mop->buf;
3054 void *tmpbuf = NULL;
3055 int r, srcu_idx;
3056 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3057 | KVM_S390_MEMOP_F_CHECK_ONLY;
3058
3059 if (mop->flags & ~supported_flags)
3060 return -EINVAL;
3061
3062 if (mop->size > MEM_OP_MAX_SIZE)
3063 return -E2BIG;
3064
3065 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3066 tmpbuf = vmalloc(mop->size);
3067 if (!tmpbuf)
3068 return -ENOMEM;
3069 }
3070
3071 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3072
3073 switch (mop->op) {
3074 case KVM_S390_MEMOP_LOGICAL_READ:
3075 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3076 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3077 mop->size, GACC_FETCH);
3078 break;
3079 }
3080 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3081 if (r == 0) {
3082 if (copy_to_user(uaddr, tmpbuf, mop->size))
3083 r = -EFAULT;
3084 }
3085 break;
3086 case KVM_S390_MEMOP_LOGICAL_WRITE:
3087 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3088 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3089 mop->size, GACC_STORE);
3090 break;
3091 }
3092 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3093 r = -EFAULT;
3094 break;
3095 }
3096 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3097 break;
3098 default:
3099 r = -EINVAL;
3100 }
3101
3102 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3103
3104 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3105 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3106
3107 vfree(tmpbuf);
3108 return r;
3109 }
3110
3111 long kvm_arch_vcpu_ioctl(struct file *filp,
3112 unsigned int ioctl, unsigned long arg)
3113 {
3114 struct kvm_vcpu *vcpu = filp->private_data;
3115 void __user *argp = (void __user *)arg;
3116 int idx;
3117 long r;
3118
3119 switch (ioctl) {
3120 case KVM_S390_IRQ: {
3121 struct kvm_s390_irq s390irq;
3122
3123 r = -EFAULT;
3124 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3125 break;
3126 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3127 break;
3128 }
3129 case KVM_S390_INTERRUPT: {
3130 struct kvm_s390_interrupt s390int;
3131 struct kvm_s390_irq s390irq;
3132
3133 r = -EFAULT;
3134 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3135 break;
3136 if (s390int_to_s390irq(&s390int, &s390irq))
3137 return -EINVAL;
3138 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3139 break;
3140 }
3141 case KVM_S390_STORE_STATUS:
3142 idx = srcu_read_lock(&vcpu->kvm->srcu);
3143 r = kvm_s390_vcpu_store_status(vcpu, arg);
3144 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3145 break;
3146 case KVM_S390_SET_INITIAL_PSW: {
3147 psw_t psw;
3148
3149 r = -EFAULT;
3150 if (copy_from_user(&psw, argp, sizeof(psw)))
3151 break;
3152 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3153 break;
3154 }
3155 case KVM_S390_INITIAL_RESET:
3156 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3157 break;
3158 case KVM_SET_ONE_REG:
3159 case KVM_GET_ONE_REG: {
3160 struct kvm_one_reg reg;
3161 r = -EFAULT;
3162 if (copy_from_user(&reg, argp, sizeof(reg)))
3163 break;
3164 if (ioctl == KVM_SET_ONE_REG)
3165 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3166 else
3167 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3168 break;
3169 }
3170 #ifdef CONFIG_KVM_S390_UCONTROL
3171 case KVM_S390_UCAS_MAP: {
3172 struct kvm_s390_ucas_mapping ucasmap;
3173
3174 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3175 r = -EFAULT;
3176 break;
3177 }
3178
3179 if (!kvm_is_ucontrol(vcpu->kvm)) {
3180 r = -EINVAL;
3181 break;
3182 }
3183
3184 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3185 ucasmap.vcpu_addr, ucasmap.length);
3186 break;
3187 }
3188 case KVM_S390_UCAS_UNMAP: {
3189 struct kvm_s390_ucas_mapping ucasmap;
3190
3191 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3192 r = -EFAULT;
3193 break;
3194 }
3195
3196 if (!kvm_is_ucontrol(vcpu->kvm)) {
3197 r = -EINVAL;
3198 break;
3199 }
3200
3201 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3202 ucasmap.length);
3203 break;
3204 }
3205 #endif
3206 case KVM_S390_VCPU_FAULT: {
3207 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3208 break;
3209 }
3210 case KVM_ENABLE_CAP:
3211 {
3212 struct kvm_enable_cap cap;
3213 r = -EFAULT;
3214 if (copy_from_user(&cap, argp, sizeof(cap)))
3215 break;
3216 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3217 break;
3218 }
3219 case KVM_S390_MEM_OP: {
3220 struct kvm_s390_mem_op mem_op;
3221
3222 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3223 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3224 else
3225 r = -EFAULT;
3226 break;
3227 }
3228 case KVM_S390_SET_IRQ_STATE: {
3229 struct kvm_s390_irq_state irq_state;
3230
3231 r = -EFAULT;
3232 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3233 break;
3234 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3235 irq_state.len == 0 ||
3236 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3237 r = -EINVAL;
3238 break;
3239 }
3240 r = kvm_s390_set_irq_state(vcpu,
3241 (void __user *) irq_state.buf,
3242 irq_state.len);
3243 break;
3244 }
3245 case KVM_S390_GET_IRQ_STATE: {
3246 struct kvm_s390_irq_state irq_state;
3247
3248 r = -EFAULT;
3249 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3250 break;
3251 if (irq_state.len == 0) {
3252 r = -EINVAL;
3253 break;
3254 }
3255 r = kvm_s390_get_irq_state(vcpu,
3256 (__u8 __user *) irq_state.buf,
3257 irq_state.len);
3258 break;
3259 }
3260 default:
3261 r = -ENOTTY;
3262 }
3263 return r;
3264 }
3265
3266 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3267 {
3268 #ifdef CONFIG_KVM_S390_UCONTROL
3269 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3270 && (kvm_is_ucontrol(vcpu->kvm))) {
3271 vmf->page = virt_to_page(vcpu->arch.sie_block);
3272 get_page(vmf->page);
3273 return 0;
3274 }
3275 #endif
3276 return VM_FAULT_SIGBUS;
3277 }
3278
3279 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3280 unsigned long npages)
3281 {
3282 return 0;
3283 }
3284
3285 /* Section: memory related */
3286 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3287 struct kvm_memory_slot *memslot,
3288 const struct kvm_userspace_memory_region *mem,
3289 enum kvm_mr_change change)
3290 {
3291 /* A few sanity checks. We can have memory slots which have to be
3292 located/ended at a segment boundary (1MB). The memory in userland is
3293 ok to be fragmented into various different vmas. It is okay to mmap()
3294 and munmap() stuff in this slot after doing this call at any time */
3295
3296 if (mem->userspace_addr & 0xffffful)
3297 return -EINVAL;
3298
3299 if (mem->memory_size & 0xffffful)
3300 return -EINVAL;
3301
3302 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3303 return -EINVAL;
3304
3305 return 0;
3306 }
3307
3308 void kvm_arch_commit_memory_region(struct kvm *kvm,
3309 const struct kvm_userspace_memory_region *mem,
3310 const struct kvm_memory_slot *old,
3311 const struct kvm_memory_slot *new,
3312 enum kvm_mr_change change)
3313 {
3314 int rc;
3315
3316 /* If the basics of the memslot do not change, we do not want
3317 * to update the gmap. Every update causes several unnecessary
3318 * segment translation exceptions. This is usually handled just
3319 * fine by the normal fault handler + gmap, but it will also
3320 * cause faults on the prefix page of running guest CPUs.
3321 */
3322 if (old->userspace_addr == mem->userspace_addr &&
3323 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3324 old->npages * PAGE_SIZE == mem->memory_size)
3325 return;
3326
3327 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3328 mem->guest_phys_addr, mem->memory_size);
3329 if (rc)
3330 pr_warn("failed to commit memory region\n");
3331 return;
3332 }
3333
3334 static inline unsigned long nonhyp_mask(int i)
3335 {
3336 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3337
3338 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3339 }
3340
3341 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3342 {
3343 vcpu->valid_wakeup = false;
3344 }
3345
3346 static int __init kvm_s390_init(void)
3347 {
3348 int i;
3349
3350 if (!sclp.has_sief2) {
3351 pr_info("SIE not available\n");
3352 return -ENODEV;
3353 }
3354
3355 for (i = 0; i < 16; i++)
3356 kvm_s390_fac_list_mask[i] |=
3357 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3358
3359 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3360 }
3361
3362 static void __exit kvm_s390_exit(void)
3363 {
3364 kvm_exit();
3365 }
3366
3367 module_init(kvm_s390_init);
3368 module_exit(kvm_s390_exit);
3369
3370 /*
3371 * Enable autoloading of the kvm module.
3372 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3373 * since x86 takes a different approach.
3374 */
3375 #include <linux/miscdevice.h>
3376 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3377 MODULE_ALIAS("devname:kvm");