]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/s390/kvm/kvm-s390.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[mirror_ubuntu-artful-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 { "instruction_spx", VCPU_STAT(instruction_spx) },
93 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 { "instruction_stap", VCPU_STAT(instruction_stap) },
95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 { "instruction_essa", VCPU_STAT(instruction_essa) },
100 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 { "instruction_sie", VCPU_STAT(instruction_sie) },
105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 { "diagnose_10", VCPU_STAT(diagnose_10) },
122 { "diagnose_44", VCPU_STAT(diagnose_44) },
123 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 { "diagnose_258", VCPU_STAT(diagnose_258) },
125 { "diagnose_308", VCPU_STAT(diagnose_308) },
126 { "diagnose_500", VCPU_STAT(diagnose_500) },
127 { NULL }
128 };
129
130 /* allow nested virtualization in KVM (if enabled by user space) */
131 static int nested;
132 module_param(nested, int, S_IRUGO);
133 MODULE_PARM_DESC(nested, "Nested virtualization support");
134
135 /* upper facilities limit for kvm */
136 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
137
138 unsigned long kvm_s390_fac_list_mask_size(void)
139 {
140 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
141 return ARRAY_SIZE(kvm_s390_fac_list_mask);
142 }
143
144 /* available cpu features supported by kvm */
145 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
146 /* available subfunctions indicated via query / "test bit" */
147 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
148
149 static struct gmap_notifier gmap_notifier;
150 static struct gmap_notifier vsie_gmap_notifier;
151 debug_info_t *kvm_s390_dbf;
152
153 /* Section: not file related */
154 int kvm_arch_hardware_enable(void)
155 {
156 /* every s390 is virtualization enabled ;-) */
157 return 0;
158 }
159
160 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
161 unsigned long end);
162
163 /*
164 * This callback is executed during stop_machine(). All CPUs are therefore
165 * temporarily stopped. In order not to change guest behavior, we have to
166 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
167 * so a CPU won't be stopped while calculating with the epoch.
168 */
169 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
170 void *v)
171 {
172 struct kvm *kvm;
173 struct kvm_vcpu *vcpu;
174 int i;
175 unsigned long long *delta = v;
176
177 list_for_each_entry(kvm, &vm_list, vm_list) {
178 kvm->arch.epoch -= *delta;
179 kvm_for_each_vcpu(i, vcpu, kvm) {
180 vcpu->arch.sie_block->epoch -= *delta;
181 if (vcpu->arch.cputm_enabled)
182 vcpu->arch.cputm_start += *delta;
183 if (vcpu->arch.vsie_block)
184 vcpu->arch.vsie_block->epoch -= *delta;
185 }
186 }
187 return NOTIFY_OK;
188 }
189
190 static struct notifier_block kvm_clock_notifier = {
191 .notifier_call = kvm_clock_sync,
192 };
193
194 int kvm_arch_hardware_setup(void)
195 {
196 gmap_notifier.notifier_call = kvm_gmap_notifier;
197 gmap_register_pte_notifier(&gmap_notifier);
198 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
199 gmap_register_pte_notifier(&vsie_gmap_notifier);
200 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
201 &kvm_clock_notifier);
202 return 0;
203 }
204
205 void kvm_arch_hardware_unsetup(void)
206 {
207 gmap_unregister_pte_notifier(&gmap_notifier);
208 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
209 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
210 &kvm_clock_notifier);
211 }
212
213 static void allow_cpu_feat(unsigned long nr)
214 {
215 set_bit_inv(nr, kvm_s390_available_cpu_feat);
216 }
217
218 static inline int plo_test_bit(unsigned char nr)
219 {
220 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
221 int cc = 3; /* subfunction not available */
222
223 asm volatile(
224 /* Parameter registers are ignored for "test bit" */
225 " plo 0,0,0,0(0)\n"
226 " ipm %0\n"
227 " srl %0,28\n"
228 : "=d" (cc)
229 : "d" (r0)
230 : "cc");
231 return cc == 0;
232 }
233
234 static void kvm_s390_cpu_feat_init(void)
235 {
236 int i;
237
238 for (i = 0; i < 256; ++i) {
239 if (plo_test_bit(i))
240 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
241 }
242
243 if (test_facility(28)) /* TOD-clock steering */
244 ptff(kvm_s390_available_subfunc.ptff,
245 sizeof(kvm_s390_available_subfunc.ptff),
246 PTFF_QAF);
247
248 if (test_facility(17)) { /* MSA */
249 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
250 kvm_s390_available_subfunc.kmac);
251 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
252 kvm_s390_available_subfunc.kmc);
253 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
254 kvm_s390_available_subfunc.km);
255 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.kimd);
257 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.klmd);
259 }
260 if (test_facility(76)) /* MSA3 */
261 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.pckmo);
263 if (test_facility(77)) { /* MSA4 */
264 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.kmctr);
266 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.kmf);
268 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
269 kvm_s390_available_subfunc.kmo);
270 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.pcc);
272 }
273 if (test_facility(57)) /* MSA5 */
274 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.ppno);
276
277 if (MACHINE_HAS_ESOP)
278 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
279 /*
280 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
281 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
282 */
283 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
284 !test_facility(3) || !nested)
285 return;
286 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
287 if (sclp.has_64bscao)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
289 if (sclp.has_siif)
290 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
291 if (sclp.has_gpere)
292 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
293 if (sclp.has_gsls)
294 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
295 if (sclp.has_ib)
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
297 if (sclp.has_cei)
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
299 if (sclp.has_ibs)
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
301 /*
302 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
303 * all skey handling functions read/set the skey from the PGSTE
304 * instead of the real storage key.
305 *
306 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
307 * pages being detected as preserved although they are resident.
308 *
309 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
310 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
311 *
312 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
313 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
314 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
315 *
316 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
317 * cannot easily shadow the SCA because of the ipte lock.
318 */
319 }
320
321 int kvm_arch_init(void *opaque)
322 {
323 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
324 if (!kvm_s390_dbf)
325 return -ENOMEM;
326
327 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
328 debug_unregister(kvm_s390_dbf);
329 return -ENOMEM;
330 }
331
332 kvm_s390_cpu_feat_init();
333
334 /* Register floating interrupt controller interface. */
335 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
336 }
337
338 void kvm_arch_exit(void)
339 {
340 debug_unregister(kvm_s390_dbf);
341 }
342
343 /* Section: device related */
344 long kvm_arch_dev_ioctl(struct file *filp,
345 unsigned int ioctl, unsigned long arg)
346 {
347 if (ioctl == KVM_S390_ENABLE_SIE)
348 return s390_enable_sie();
349 return -EINVAL;
350 }
351
352 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
353 {
354 int r;
355
356 switch (ext) {
357 case KVM_CAP_S390_PSW:
358 case KVM_CAP_S390_GMAP:
359 case KVM_CAP_SYNC_MMU:
360 #ifdef CONFIG_KVM_S390_UCONTROL
361 case KVM_CAP_S390_UCONTROL:
362 #endif
363 case KVM_CAP_ASYNC_PF:
364 case KVM_CAP_SYNC_REGS:
365 case KVM_CAP_ONE_REG:
366 case KVM_CAP_ENABLE_CAP:
367 case KVM_CAP_S390_CSS_SUPPORT:
368 case KVM_CAP_IOEVENTFD:
369 case KVM_CAP_DEVICE_CTRL:
370 case KVM_CAP_ENABLE_CAP_VM:
371 case KVM_CAP_S390_IRQCHIP:
372 case KVM_CAP_VM_ATTRIBUTES:
373 case KVM_CAP_MP_STATE:
374 case KVM_CAP_S390_INJECT_IRQ:
375 case KVM_CAP_S390_USER_SIGP:
376 case KVM_CAP_S390_USER_STSI:
377 case KVM_CAP_S390_SKEYS:
378 case KVM_CAP_S390_IRQ_STATE:
379 case KVM_CAP_S390_USER_INSTR0:
380 r = 1;
381 break;
382 case KVM_CAP_S390_MEM_OP:
383 r = MEM_OP_MAX_SIZE;
384 break;
385 case KVM_CAP_NR_VCPUS:
386 case KVM_CAP_MAX_VCPUS:
387 r = KVM_S390_BSCA_CPU_SLOTS;
388 if (!kvm_s390_use_sca_entries())
389 r = KVM_MAX_VCPUS;
390 else if (sclp.has_esca && sclp.has_64bscao)
391 r = KVM_S390_ESCA_CPU_SLOTS;
392 break;
393 case KVM_CAP_NR_MEMSLOTS:
394 r = KVM_USER_MEM_SLOTS;
395 break;
396 case KVM_CAP_S390_COW:
397 r = MACHINE_HAS_ESOP;
398 break;
399 case KVM_CAP_S390_VECTOR_REGISTERS:
400 r = MACHINE_HAS_VX;
401 break;
402 case KVM_CAP_S390_RI:
403 r = test_facility(64);
404 break;
405 default:
406 r = 0;
407 }
408 return r;
409 }
410
411 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
412 struct kvm_memory_slot *memslot)
413 {
414 gfn_t cur_gfn, last_gfn;
415 unsigned long address;
416 struct gmap *gmap = kvm->arch.gmap;
417
418 /* Loop over all guest pages */
419 last_gfn = memslot->base_gfn + memslot->npages;
420 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
421 address = gfn_to_hva_memslot(memslot, cur_gfn);
422
423 if (test_and_clear_guest_dirty(gmap->mm, address))
424 mark_page_dirty(kvm, cur_gfn);
425 if (fatal_signal_pending(current))
426 return;
427 cond_resched();
428 }
429 }
430
431 /* Section: vm related */
432 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
433
434 /*
435 * Get (and clear) the dirty memory log for a memory slot.
436 */
437 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
438 struct kvm_dirty_log *log)
439 {
440 int r;
441 unsigned long n;
442 struct kvm_memslots *slots;
443 struct kvm_memory_slot *memslot;
444 int is_dirty = 0;
445
446 mutex_lock(&kvm->slots_lock);
447
448 r = -EINVAL;
449 if (log->slot >= KVM_USER_MEM_SLOTS)
450 goto out;
451
452 slots = kvm_memslots(kvm);
453 memslot = id_to_memslot(slots, log->slot);
454 r = -ENOENT;
455 if (!memslot->dirty_bitmap)
456 goto out;
457
458 kvm_s390_sync_dirty_log(kvm, memslot);
459 r = kvm_get_dirty_log(kvm, log, &is_dirty);
460 if (r)
461 goto out;
462
463 /* Clear the dirty log */
464 if (is_dirty) {
465 n = kvm_dirty_bitmap_bytes(memslot);
466 memset(memslot->dirty_bitmap, 0, n);
467 }
468 r = 0;
469 out:
470 mutex_unlock(&kvm->slots_lock);
471 return r;
472 }
473
474 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
475 {
476 unsigned int i;
477 struct kvm_vcpu *vcpu;
478
479 kvm_for_each_vcpu(i, vcpu, kvm) {
480 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
481 }
482 }
483
484 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
485 {
486 int r;
487
488 if (cap->flags)
489 return -EINVAL;
490
491 switch (cap->cap) {
492 case KVM_CAP_S390_IRQCHIP:
493 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
494 kvm->arch.use_irqchip = 1;
495 r = 0;
496 break;
497 case KVM_CAP_S390_USER_SIGP:
498 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
499 kvm->arch.user_sigp = 1;
500 r = 0;
501 break;
502 case KVM_CAP_S390_VECTOR_REGISTERS:
503 mutex_lock(&kvm->lock);
504 if (kvm->created_vcpus) {
505 r = -EBUSY;
506 } else if (MACHINE_HAS_VX) {
507 set_kvm_facility(kvm->arch.model.fac_mask, 129);
508 set_kvm_facility(kvm->arch.model.fac_list, 129);
509 r = 0;
510 } else
511 r = -EINVAL;
512 mutex_unlock(&kvm->lock);
513 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
514 r ? "(not available)" : "(success)");
515 break;
516 case KVM_CAP_S390_RI:
517 r = -EINVAL;
518 mutex_lock(&kvm->lock);
519 if (kvm->created_vcpus) {
520 r = -EBUSY;
521 } else if (test_facility(64)) {
522 set_kvm_facility(kvm->arch.model.fac_mask, 64);
523 set_kvm_facility(kvm->arch.model.fac_list, 64);
524 r = 0;
525 }
526 mutex_unlock(&kvm->lock);
527 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
528 r ? "(not available)" : "(success)");
529 break;
530 case KVM_CAP_S390_USER_STSI:
531 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
532 kvm->arch.user_stsi = 1;
533 r = 0;
534 break;
535 case KVM_CAP_S390_USER_INSTR0:
536 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
537 kvm->arch.user_instr0 = 1;
538 icpt_operexc_on_all_vcpus(kvm);
539 r = 0;
540 break;
541 default:
542 r = -EINVAL;
543 break;
544 }
545 return r;
546 }
547
548 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
549 {
550 int ret;
551
552 switch (attr->attr) {
553 case KVM_S390_VM_MEM_LIMIT_SIZE:
554 ret = 0;
555 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
556 kvm->arch.mem_limit);
557 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
558 ret = -EFAULT;
559 break;
560 default:
561 ret = -ENXIO;
562 break;
563 }
564 return ret;
565 }
566
567 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
568 {
569 int ret;
570 unsigned int idx;
571 switch (attr->attr) {
572 case KVM_S390_VM_MEM_ENABLE_CMMA:
573 ret = -ENXIO;
574 if (!sclp.has_cmma)
575 break;
576
577 ret = -EBUSY;
578 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
579 mutex_lock(&kvm->lock);
580 if (!kvm->created_vcpus) {
581 kvm->arch.use_cmma = 1;
582 ret = 0;
583 }
584 mutex_unlock(&kvm->lock);
585 break;
586 case KVM_S390_VM_MEM_CLR_CMMA:
587 ret = -ENXIO;
588 if (!sclp.has_cmma)
589 break;
590 ret = -EINVAL;
591 if (!kvm->arch.use_cmma)
592 break;
593
594 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
595 mutex_lock(&kvm->lock);
596 idx = srcu_read_lock(&kvm->srcu);
597 s390_reset_cmma(kvm->arch.gmap->mm);
598 srcu_read_unlock(&kvm->srcu, idx);
599 mutex_unlock(&kvm->lock);
600 ret = 0;
601 break;
602 case KVM_S390_VM_MEM_LIMIT_SIZE: {
603 unsigned long new_limit;
604
605 if (kvm_is_ucontrol(kvm))
606 return -EINVAL;
607
608 if (get_user(new_limit, (u64 __user *)attr->addr))
609 return -EFAULT;
610
611 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
612 new_limit > kvm->arch.mem_limit)
613 return -E2BIG;
614
615 if (!new_limit)
616 return -EINVAL;
617
618 /* gmap_create takes last usable address */
619 if (new_limit != KVM_S390_NO_MEM_LIMIT)
620 new_limit -= 1;
621
622 ret = -EBUSY;
623 mutex_lock(&kvm->lock);
624 if (!kvm->created_vcpus) {
625 /* gmap_create will round the limit up */
626 struct gmap *new = gmap_create(current->mm, new_limit);
627
628 if (!new) {
629 ret = -ENOMEM;
630 } else {
631 gmap_remove(kvm->arch.gmap);
632 new->private = kvm;
633 kvm->arch.gmap = new;
634 ret = 0;
635 }
636 }
637 mutex_unlock(&kvm->lock);
638 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
639 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
640 (void *) kvm->arch.gmap->asce);
641 break;
642 }
643 default:
644 ret = -ENXIO;
645 break;
646 }
647 return ret;
648 }
649
650 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
651
652 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
653 {
654 struct kvm_vcpu *vcpu;
655 int i;
656
657 if (!test_kvm_facility(kvm, 76))
658 return -EINVAL;
659
660 mutex_lock(&kvm->lock);
661 switch (attr->attr) {
662 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
663 get_random_bytes(
664 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
665 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
666 kvm->arch.crypto.aes_kw = 1;
667 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
668 break;
669 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
670 get_random_bytes(
671 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
672 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
673 kvm->arch.crypto.dea_kw = 1;
674 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
675 break;
676 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
677 kvm->arch.crypto.aes_kw = 0;
678 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
679 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
680 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
681 break;
682 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
683 kvm->arch.crypto.dea_kw = 0;
684 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
685 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
686 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
687 break;
688 default:
689 mutex_unlock(&kvm->lock);
690 return -ENXIO;
691 }
692
693 kvm_for_each_vcpu(i, vcpu, kvm) {
694 kvm_s390_vcpu_crypto_setup(vcpu);
695 exit_sie(vcpu);
696 }
697 mutex_unlock(&kvm->lock);
698 return 0;
699 }
700
701 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
702 {
703 u8 gtod_high;
704
705 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
706 sizeof(gtod_high)))
707 return -EFAULT;
708
709 if (gtod_high != 0)
710 return -EINVAL;
711 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
712
713 return 0;
714 }
715
716 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
717 {
718 u64 gtod;
719
720 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
721 return -EFAULT;
722
723 kvm_s390_set_tod_clock(kvm, gtod);
724 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
725 return 0;
726 }
727
728 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
729 {
730 int ret;
731
732 if (attr->flags)
733 return -EINVAL;
734
735 switch (attr->attr) {
736 case KVM_S390_VM_TOD_HIGH:
737 ret = kvm_s390_set_tod_high(kvm, attr);
738 break;
739 case KVM_S390_VM_TOD_LOW:
740 ret = kvm_s390_set_tod_low(kvm, attr);
741 break;
742 default:
743 ret = -ENXIO;
744 break;
745 }
746 return ret;
747 }
748
749 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
750 {
751 u8 gtod_high = 0;
752
753 if (copy_to_user((void __user *)attr->addr, &gtod_high,
754 sizeof(gtod_high)))
755 return -EFAULT;
756 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
757
758 return 0;
759 }
760
761 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
762 {
763 u64 gtod;
764
765 gtod = kvm_s390_get_tod_clock_fast(kvm);
766 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
767 return -EFAULT;
768 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
769
770 return 0;
771 }
772
773 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
774 {
775 int ret;
776
777 if (attr->flags)
778 return -EINVAL;
779
780 switch (attr->attr) {
781 case KVM_S390_VM_TOD_HIGH:
782 ret = kvm_s390_get_tod_high(kvm, attr);
783 break;
784 case KVM_S390_VM_TOD_LOW:
785 ret = kvm_s390_get_tod_low(kvm, attr);
786 break;
787 default:
788 ret = -ENXIO;
789 break;
790 }
791 return ret;
792 }
793
794 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
795 {
796 struct kvm_s390_vm_cpu_processor *proc;
797 u16 lowest_ibc, unblocked_ibc;
798 int ret = 0;
799
800 mutex_lock(&kvm->lock);
801 if (kvm->created_vcpus) {
802 ret = -EBUSY;
803 goto out;
804 }
805 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
806 if (!proc) {
807 ret = -ENOMEM;
808 goto out;
809 }
810 if (!copy_from_user(proc, (void __user *)attr->addr,
811 sizeof(*proc))) {
812 kvm->arch.model.cpuid = proc->cpuid;
813 lowest_ibc = sclp.ibc >> 16 & 0xfff;
814 unblocked_ibc = sclp.ibc & 0xfff;
815 if (lowest_ibc && proc->ibc) {
816 if (proc->ibc > unblocked_ibc)
817 kvm->arch.model.ibc = unblocked_ibc;
818 else if (proc->ibc < lowest_ibc)
819 kvm->arch.model.ibc = lowest_ibc;
820 else
821 kvm->arch.model.ibc = proc->ibc;
822 }
823 memcpy(kvm->arch.model.fac_list, proc->fac_list,
824 S390_ARCH_FAC_LIST_SIZE_BYTE);
825 } else
826 ret = -EFAULT;
827 kfree(proc);
828 out:
829 mutex_unlock(&kvm->lock);
830 return ret;
831 }
832
833 static int kvm_s390_set_processor_feat(struct kvm *kvm,
834 struct kvm_device_attr *attr)
835 {
836 struct kvm_s390_vm_cpu_feat data;
837 int ret = -EBUSY;
838
839 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
840 return -EFAULT;
841 if (!bitmap_subset((unsigned long *) data.feat,
842 kvm_s390_available_cpu_feat,
843 KVM_S390_VM_CPU_FEAT_NR_BITS))
844 return -EINVAL;
845
846 mutex_lock(&kvm->lock);
847 if (!atomic_read(&kvm->online_vcpus)) {
848 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
849 KVM_S390_VM_CPU_FEAT_NR_BITS);
850 ret = 0;
851 }
852 mutex_unlock(&kvm->lock);
853 return ret;
854 }
855
856 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
857 struct kvm_device_attr *attr)
858 {
859 /*
860 * Once supported by kernel + hw, we have to store the subfunctions
861 * in kvm->arch and remember that user space configured them.
862 */
863 return -ENXIO;
864 }
865
866 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
867 {
868 int ret = -ENXIO;
869
870 switch (attr->attr) {
871 case KVM_S390_VM_CPU_PROCESSOR:
872 ret = kvm_s390_set_processor(kvm, attr);
873 break;
874 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
875 ret = kvm_s390_set_processor_feat(kvm, attr);
876 break;
877 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
878 ret = kvm_s390_set_processor_subfunc(kvm, attr);
879 break;
880 }
881 return ret;
882 }
883
884 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
885 {
886 struct kvm_s390_vm_cpu_processor *proc;
887 int ret = 0;
888
889 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
890 if (!proc) {
891 ret = -ENOMEM;
892 goto out;
893 }
894 proc->cpuid = kvm->arch.model.cpuid;
895 proc->ibc = kvm->arch.model.ibc;
896 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
897 S390_ARCH_FAC_LIST_SIZE_BYTE);
898 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
899 ret = -EFAULT;
900 kfree(proc);
901 out:
902 return ret;
903 }
904
905 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907 struct kvm_s390_vm_cpu_machine *mach;
908 int ret = 0;
909
910 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
911 if (!mach) {
912 ret = -ENOMEM;
913 goto out;
914 }
915 get_cpu_id((struct cpuid *) &mach->cpuid);
916 mach->ibc = sclp.ibc;
917 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
918 S390_ARCH_FAC_LIST_SIZE_BYTE);
919 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
920 sizeof(S390_lowcore.stfle_fac_list));
921 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
922 ret = -EFAULT;
923 kfree(mach);
924 out:
925 return ret;
926 }
927
928 static int kvm_s390_get_processor_feat(struct kvm *kvm,
929 struct kvm_device_attr *attr)
930 {
931 struct kvm_s390_vm_cpu_feat data;
932
933 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
934 KVM_S390_VM_CPU_FEAT_NR_BITS);
935 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
936 return -EFAULT;
937 return 0;
938 }
939
940 static int kvm_s390_get_machine_feat(struct kvm *kvm,
941 struct kvm_device_attr *attr)
942 {
943 struct kvm_s390_vm_cpu_feat data;
944
945 bitmap_copy((unsigned long *) data.feat,
946 kvm_s390_available_cpu_feat,
947 KVM_S390_VM_CPU_FEAT_NR_BITS);
948 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
949 return -EFAULT;
950 return 0;
951 }
952
953 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
954 struct kvm_device_attr *attr)
955 {
956 /*
957 * Once we can actually configure subfunctions (kernel + hw support),
958 * we have to check if they were already set by user space, if so copy
959 * them from kvm->arch.
960 */
961 return -ENXIO;
962 }
963
964 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
965 struct kvm_device_attr *attr)
966 {
967 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
968 sizeof(struct kvm_s390_vm_cpu_subfunc)))
969 return -EFAULT;
970 return 0;
971 }
972 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
973 {
974 int ret = -ENXIO;
975
976 switch (attr->attr) {
977 case KVM_S390_VM_CPU_PROCESSOR:
978 ret = kvm_s390_get_processor(kvm, attr);
979 break;
980 case KVM_S390_VM_CPU_MACHINE:
981 ret = kvm_s390_get_machine(kvm, attr);
982 break;
983 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
984 ret = kvm_s390_get_processor_feat(kvm, attr);
985 break;
986 case KVM_S390_VM_CPU_MACHINE_FEAT:
987 ret = kvm_s390_get_machine_feat(kvm, attr);
988 break;
989 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
990 ret = kvm_s390_get_processor_subfunc(kvm, attr);
991 break;
992 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
993 ret = kvm_s390_get_machine_subfunc(kvm, attr);
994 break;
995 }
996 return ret;
997 }
998
999 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1000 {
1001 int ret;
1002
1003 switch (attr->group) {
1004 case KVM_S390_VM_MEM_CTRL:
1005 ret = kvm_s390_set_mem_control(kvm, attr);
1006 break;
1007 case KVM_S390_VM_TOD:
1008 ret = kvm_s390_set_tod(kvm, attr);
1009 break;
1010 case KVM_S390_VM_CPU_MODEL:
1011 ret = kvm_s390_set_cpu_model(kvm, attr);
1012 break;
1013 case KVM_S390_VM_CRYPTO:
1014 ret = kvm_s390_vm_set_crypto(kvm, attr);
1015 break;
1016 default:
1017 ret = -ENXIO;
1018 break;
1019 }
1020
1021 return ret;
1022 }
1023
1024 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1025 {
1026 int ret;
1027
1028 switch (attr->group) {
1029 case KVM_S390_VM_MEM_CTRL:
1030 ret = kvm_s390_get_mem_control(kvm, attr);
1031 break;
1032 case KVM_S390_VM_TOD:
1033 ret = kvm_s390_get_tod(kvm, attr);
1034 break;
1035 case KVM_S390_VM_CPU_MODEL:
1036 ret = kvm_s390_get_cpu_model(kvm, attr);
1037 break;
1038 default:
1039 ret = -ENXIO;
1040 break;
1041 }
1042
1043 return ret;
1044 }
1045
1046 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1047 {
1048 int ret;
1049
1050 switch (attr->group) {
1051 case KVM_S390_VM_MEM_CTRL:
1052 switch (attr->attr) {
1053 case KVM_S390_VM_MEM_ENABLE_CMMA:
1054 case KVM_S390_VM_MEM_CLR_CMMA:
1055 ret = sclp.has_cmma ? 0 : -ENXIO;
1056 break;
1057 case KVM_S390_VM_MEM_LIMIT_SIZE:
1058 ret = 0;
1059 break;
1060 default:
1061 ret = -ENXIO;
1062 break;
1063 }
1064 break;
1065 case KVM_S390_VM_TOD:
1066 switch (attr->attr) {
1067 case KVM_S390_VM_TOD_LOW:
1068 case KVM_S390_VM_TOD_HIGH:
1069 ret = 0;
1070 break;
1071 default:
1072 ret = -ENXIO;
1073 break;
1074 }
1075 break;
1076 case KVM_S390_VM_CPU_MODEL:
1077 switch (attr->attr) {
1078 case KVM_S390_VM_CPU_PROCESSOR:
1079 case KVM_S390_VM_CPU_MACHINE:
1080 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1081 case KVM_S390_VM_CPU_MACHINE_FEAT:
1082 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1083 ret = 0;
1084 break;
1085 /* configuring subfunctions is not supported yet */
1086 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1087 default:
1088 ret = -ENXIO;
1089 break;
1090 }
1091 break;
1092 case KVM_S390_VM_CRYPTO:
1093 switch (attr->attr) {
1094 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1095 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1096 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1097 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1098 ret = 0;
1099 break;
1100 default:
1101 ret = -ENXIO;
1102 break;
1103 }
1104 break;
1105 default:
1106 ret = -ENXIO;
1107 break;
1108 }
1109
1110 return ret;
1111 }
1112
1113 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1114 {
1115 uint8_t *keys;
1116 uint64_t hva;
1117 int i, r = 0;
1118
1119 if (args->flags != 0)
1120 return -EINVAL;
1121
1122 /* Is this guest using storage keys? */
1123 if (!mm_use_skey(current->mm))
1124 return KVM_S390_GET_SKEYS_NONE;
1125
1126 /* Enforce sane limit on memory allocation */
1127 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1128 return -EINVAL;
1129
1130 keys = kmalloc_array(args->count, sizeof(uint8_t),
1131 GFP_KERNEL | __GFP_NOWARN);
1132 if (!keys)
1133 keys = vmalloc(sizeof(uint8_t) * args->count);
1134 if (!keys)
1135 return -ENOMEM;
1136
1137 down_read(&current->mm->mmap_sem);
1138 for (i = 0; i < args->count; i++) {
1139 hva = gfn_to_hva(kvm, args->start_gfn + i);
1140 if (kvm_is_error_hva(hva)) {
1141 r = -EFAULT;
1142 break;
1143 }
1144
1145 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1146 if (r)
1147 break;
1148 }
1149 up_read(&current->mm->mmap_sem);
1150
1151 if (!r) {
1152 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1153 sizeof(uint8_t) * args->count);
1154 if (r)
1155 r = -EFAULT;
1156 }
1157
1158 kvfree(keys);
1159 return r;
1160 }
1161
1162 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1163 {
1164 uint8_t *keys;
1165 uint64_t hva;
1166 int i, r = 0;
1167
1168 if (args->flags != 0)
1169 return -EINVAL;
1170
1171 /* Enforce sane limit on memory allocation */
1172 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1173 return -EINVAL;
1174
1175 keys = kmalloc_array(args->count, sizeof(uint8_t),
1176 GFP_KERNEL | __GFP_NOWARN);
1177 if (!keys)
1178 keys = vmalloc(sizeof(uint8_t) * args->count);
1179 if (!keys)
1180 return -ENOMEM;
1181
1182 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1183 sizeof(uint8_t) * args->count);
1184 if (r) {
1185 r = -EFAULT;
1186 goto out;
1187 }
1188
1189 /* Enable storage key handling for the guest */
1190 r = s390_enable_skey();
1191 if (r)
1192 goto out;
1193
1194 down_read(&current->mm->mmap_sem);
1195 for (i = 0; i < args->count; i++) {
1196 hva = gfn_to_hva(kvm, args->start_gfn + i);
1197 if (kvm_is_error_hva(hva)) {
1198 r = -EFAULT;
1199 break;
1200 }
1201
1202 /* Lowest order bit is reserved */
1203 if (keys[i] & 0x01) {
1204 r = -EINVAL;
1205 break;
1206 }
1207
1208 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1209 if (r)
1210 break;
1211 }
1212 up_read(&current->mm->mmap_sem);
1213 out:
1214 kvfree(keys);
1215 return r;
1216 }
1217
1218 long kvm_arch_vm_ioctl(struct file *filp,
1219 unsigned int ioctl, unsigned long arg)
1220 {
1221 struct kvm *kvm = filp->private_data;
1222 void __user *argp = (void __user *)arg;
1223 struct kvm_device_attr attr;
1224 int r;
1225
1226 switch (ioctl) {
1227 case KVM_S390_INTERRUPT: {
1228 struct kvm_s390_interrupt s390int;
1229
1230 r = -EFAULT;
1231 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1232 break;
1233 r = kvm_s390_inject_vm(kvm, &s390int);
1234 break;
1235 }
1236 case KVM_ENABLE_CAP: {
1237 struct kvm_enable_cap cap;
1238 r = -EFAULT;
1239 if (copy_from_user(&cap, argp, sizeof(cap)))
1240 break;
1241 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1242 break;
1243 }
1244 case KVM_CREATE_IRQCHIP: {
1245 struct kvm_irq_routing_entry routing;
1246
1247 r = -EINVAL;
1248 if (kvm->arch.use_irqchip) {
1249 /* Set up dummy routing. */
1250 memset(&routing, 0, sizeof(routing));
1251 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1252 }
1253 break;
1254 }
1255 case KVM_SET_DEVICE_ATTR: {
1256 r = -EFAULT;
1257 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1258 break;
1259 r = kvm_s390_vm_set_attr(kvm, &attr);
1260 break;
1261 }
1262 case KVM_GET_DEVICE_ATTR: {
1263 r = -EFAULT;
1264 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1265 break;
1266 r = kvm_s390_vm_get_attr(kvm, &attr);
1267 break;
1268 }
1269 case KVM_HAS_DEVICE_ATTR: {
1270 r = -EFAULT;
1271 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1272 break;
1273 r = kvm_s390_vm_has_attr(kvm, &attr);
1274 break;
1275 }
1276 case KVM_S390_GET_SKEYS: {
1277 struct kvm_s390_skeys args;
1278
1279 r = -EFAULT;
1280 if (copy_from_user(&args, argp,
1281 sizeof(struct kvm_s390_skeys)))
1282 break;
1283 r = kvm_s390_get_skeys(kvm, &args);
1284 break;
1285 }
1286 case KVM_S390_SET_SKEYS: {
1287 struct kvm_s390_skeys args;
1288
1289 r = -EFAULT;
1290 if (copy_from_user(&args, argp,
1291 sizeof(struct kvm_s390_skeys)))
1292 break;
1293 r = kvm_s390_set_skeys(kvm, &args);
1294 break;
1295 }
1296 default:
1297 r = -ENOTTY;
1298 }
1299
1300 return r;
1301 }
1302
1303 static int kvm_s390_query_ap_config(u8 *config)
1304 {
1305 u32 fcn_code = 0x04000000UL;
1306 u32 cc = 0;
1307
1308 memset(config, 0, 128);
1309 asm volatile(
1310 "lgr 0,%1\n"
1311 "lgr 2,%2\n"
1312 ".long 0xb2af0000\n" /* PQAP(QCI) */
1313 "0: ipm %0\n"
1314 "srl %0,28\n"
1315 "1:\n"
1316 EX_TABLE(0b, 1b)
1317 : "+r" (cc)
1318 : "r" (fcn_code), "r" (config)
1319 : "cc", "0", "2", "memory"
1320 );
1321
1322 return cc;
1323 }
1324
1325 static int kvm_s390_apxa_installed(void)
1326 {
1327 u8 config[128];
1328 int cc;
1329
1330 if (test_facility(12)) {
1331 cc = kvm_s390_query_ap_config(config);
1332
1333 if (cc)
1334 pr_err("PQAP(QCI) failed with cc=%d", cc);
1335 else
1336 return config[0] & 0x40;
1337 }
1338
1339 return 0;
1340 }
1341
1342 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1343 {
1344 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1345
1346 if (kvm_s390_apxa_installed())
1347 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1348 else
1349 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1350 }
1351
1352 static u64 kvm_s390_get_initial_cpuid(void)
1353 {
1354 struct cpuid cpuid;
1355
1356 get_cpu_id(&cpuid);
1357 cpuid.version = 0xff;
1358 return *((u64 *) &cpuid);
1359 }
1360
1361 static void kvm_s390_crypto_init(struct kvm *kvm)
1362 {
1363 if (!test_kvm_facility(kvm, 76))
1364 return;
1365
1366 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1367 kvm_s390_set_crycb_format(kvm);
1368
1369 /* Enable AES/DEA protected key functions by default */
1370 kvm->arch.crypto.aes_kw = 1;
1371 kvm->arch.crypto.dea_kw = 1;
1372 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1373 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1374 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1375 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1376 }
1377
1378 static void sca_dispose(struct kvm *kvm)
1379 {
1380 if (kvm->arch.use_esca)
1381 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1382 else
1383 free_page((unsigned long)(kvm->arch.sca));
1384 kvm->arch.sca = NULL;
1385 }
1386
1387 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1388 {
1389 gfp_t alloc_flags = GFP_KERNEL;
1390 int i, rc;
1391 char debug_name[16];
1392 static unsigned long sca_offset;
1393
1394 rc = -EINVAL;
1395 #ifdef CONFIG_KVM_S390_UCONTROL
1396 if (type & ~KVM_VM_S390_UCONTROL)
1397 goto out_err;
1398 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1399 goto out_err;
1400 #else
1401 if (type)
1402 goto out_err;
1403 #endif
1404
1405 rc = s390_enable_sie();
1406 if (rc)
1407 goto out_err;
1408
1409 rc = -ENOMEM;
1410
1411 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1412
1413 kvm->arch.use_esca = 0; /* start with basic SCA */
1414 if (!sclp.has_64bscao)
1415 alloc_flags |= GFP_DMA;
1416 rwlock_init(&kvm->arch.sca_lock);
1417 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1418 if (!kvm->arch.sca)
1419 goto out_err;
1420 spin_lock(&kvm_lock);
1421 sca_offset += 16;
1422 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1423 sca_offset = 0;
1424 kvm->arch.sca = (struct bsca_block *)
1425 ((char *) kvm->arch.sca + sca_offset);
1426 spin_unlock(&kvm_lock);
1427
1428 sprintf(debug_name, "kvm-%u", current->pid);
1429
1430 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1431 if (!kvm->arch.dbf)
1432 goto out_err;
1433
1434 kvm->arch.sie_page2 =
1435 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1436 if (!kvm->arch.sie_page2)
1437 goto out_err;
1438
1439 /* Populate the facility mask initially. */
1440 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1441 sizeof(S390_lowcore.stfle_fac_list));
1442 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1443 if (i < kvm_s390_fac_list_mask_size())
1444 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1445 else
1446 kvm->arch.model.fac_mask[i] = 0UL;
1447 }
1448
1449 /* Populate the facility list initially. */
1450 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1451 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1452 S390_ARCH_FAC_LIST_SIZE_BYTE);
1453
1454 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1455 set_kvm_facility(kvm->arch.model.fac_list, 74);
1456
1457 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1458 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1459
1460 kvm_s390_crypto_init(kvm);
1461
1462 spin_lock_init(&kvm->arch.float_int.lock);
1463 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1464 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1465 init_waitqueue_head(&kvm->arch.ipte_wq);
1466 mutex_init(&kvm->arch.ipte_mutex);
1467
1468 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1469 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1470
1471 if (type & KVM_VM_S390_UCONTROL) {
1472 kvm->arch.gmap = NULL;
1473 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1474 } else {
1475 if (sclp.hamax == U64_MAX)
1476 kvm->arch.mem_limit = TASK_MAX_SIZE;
1477 else
1478 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1479 sclp.hamax + 1);
1480 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1481 if (!kvm->arch.gmap)
1482 goto out_err;
1483 kvm->arch.gmap->private = kvm;
1484 kvm->arch.gmap->pfault_enabled = 0;
1485 }
1486
1487 kvm->arch.css_support = 0;
1488 kvm->arch.use_irqchip = 0;
1489 kvm->arch.epoch = 0;
1490
1491 spin_lock_init(&kvm->arch.start_stop_lock);
1492 kvm_s390_vsie_init(kvm);
1493 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1494
1495 return 0;
1496 out_err:
1497 free_page((unsigned long)kvm->arch.sie_page2);
1498 debug_unregister(kvm->arch.dbf);
1499 sca_dispose(kvm);
1500 KVM_EVENT(3, "creation of vm failed: %d", rc);
1501 return rc;
1502 }
1503
1504 bool kvm_arch_has_vcpu_debugfs(void)
1505 {
1506 return false;
1507 }
1508
1509 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1510 {
1511 return 0;
1512 }
1513
1514 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1515 {
1516 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1517 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1518 kvm_s390_clear_local_irqs(vcpu);
1519 kvm_clear_async_pf_completion_queue(vcpu);
1520 if (!kvm_is_ucontrol(vcpu->kvm))
1521 sca_del_vcpu(vcpu);
1522
1523 if (kvm_is_ucontrol(vcpu->kvm))
1524 gmap_remove(vcpu->arch.gmap);
1525
1526 if (vcpu->kvm->arch.use_cmma)
1527 kvm_s390_vcpu_unsetup_cmma(vcpu);
1528 free_page((unsigned long)(vcpu->arch.sie_block));
1529
1530 kvm_vcpu_uninit(vcpu);
1531 kmem_cache_free(kvm_vcpu_cache, vcpu);
1532 }
1533
1534 static void kvm_free_vcpus(struct kvm *kvm)
1535 {
1536 unsigned int i;
1537 struct kvm_vcpu *vcpu;
1538
1539 kvm_for_each_vcpu(i, vcpu, kvm)
1540 kvm_arch_vcpu_destroy(vcpu);
1541
1542 mutex_lock(&kvm->lock);
1543 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1544 kvm->vcpus[i] = NULL;
1545
1546 atomic_set(&kvm->online_vcpus, 0);
1547 mutex_unlock(&kvm->lock);
1548 }
1549
1550 void kvm_arch_destroy_vm(struct kvm *kvm)
1551 {
1552 kvm_free_vcpus(kvm);
1553 sca_dispose(kvm);
1554 debug_unregister(kvm->arch.dbf);
1555 free_page((unsigned long)kvm->arch.sie_page2);
1556 if (!kvm_is_ucontrol(kvm))
1557 gmap_remove(kvm->arch.gmap);
1558 kvm_s390_destroy_adapters(kvm);
1559 kvm_s390_clear_float_irqs(kvm);
1560 kvm_s390_vsie_destroy(kvm);
1561 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1562 }
1563
1564 /* Section: vcpu related */
1565 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1566 {
1567 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1568 if (!vcpu->arch.gmap)
1569 return -ENOMEM;
1570 vcpu->arch.gmap->private = vcpu->kvm;
1571
1572 return 0;
1573 }
1574
1575 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1576 {
1577 if (!kvm_s390_use_sca_entries())
1578 return;
1579 read_lock(&vcpu->kvm->arch.sca_lock);
1580 if (vcpu->kvm->arch.use_esca) {
1581 struct esca_block *sca = vcpu->kvm->arch.sca;
1582
1583 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1584 sca->cpu[vcpu->vcpu_id].sda = 0;
1585 } else {
1586 struct bsca_block *sca = vcpu->kvm->arch.sca;
1587
1588 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1589 sca->cpu[vcpu->vcpu_id].sda = 0;
1590 }
1591 read_unlock(&vcpu->kvm->arch.sca_lock);
1592 }
1593
1594 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1595 {
1596 if (!kvm_s390_use_sca_entries()) {
1597 struct bsca_block *sca = vcpu->kvm->arch.sca;
1598
1599 /* we still need the basic sca for the ipte control */
1600 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1601 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1602 }
1603 read_lock(&vcpu->kvm->arch.sca_lock);
1604 if (vcpu->kvm->arch.use_esca) {
1605 struct esca_block *sca = vcpu->kvm->arch.sca;
1606
1607 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1608 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1609 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1610 vcpu->arch.sie_block->ecb2 |= 0x04U;
1611 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1612 } else {
1613 struct bsca_block *sca = vcpu->kvm->arch.sca;
1614
1615 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1616 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1617 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1618 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1619 }
1620 read_unlock(&vcpu->kvm->arch.sca_lock);
1621 }
1622
1623 /* Basic SCA to Extended SCA data copy routines */
1624 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1625 {
1626 d->sda = s->sda;
1627 d->sigp_ctrl.c = s->sigp_ctrl.c;
1628 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1629 }
1630
1631 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1632 {
1633 int i;
1634
1635 d->ipte_control = s->ipte_control;
1636 d->mcn[0] = s->mcn;
1637 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1638 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1639 }
1640
1641 static int sca_switch_to_extended(struct kvm *kvm)
1642 {
1643 struct bsca_block *old_sca = kvm->arch.sca;
1644 struct esca_block *new_sca;
1645 struct kvm_vcpu *vcpu;
1646 unsigned int vcpu_idx;
1647 u32 scaol, scaoh;
1648
1649 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1650 if (!new_sca)
1651 return -ENOMEM;
1652
1653 scaoh = (u32)((u64)(new_sca) >> 32);
1654 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1655
1656 kvm_s390_vcpu_block_all(kvm);
1657 write_lock(&kvm->arch.sca_lock);
1658
1659 sca_copy_b_to_e(new_sca, old_sca);
1660
1661 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1662 vcpu->arch.sie_block->scaoh = scaoh;
1663 vcpu->arch.sie_block->scaol = scaol;
1664 vcpu->arch.sie_block->ecb2 |= 0x04U;
1665 }
1666 kvm->arch.sca = new_sca;
1667 kvm->arch.use_esca = 1;
1668
1669 write_unlock(&kvm->arch.sca_lock);
1670 kvm_s390_vcpu_unblock_all(kvm);
1671
1672 free_page((unsigned long)old_sca);
1673
1674 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1675 old_sca, kvm->arch.sca);
1676 return 0;
1677 }
1678
1679 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1680 {
1681 int rc;
1682
1683 if (!kvm_s390_use_sca_entries()) {
1684 if (id < KVM_MAX_VCPUS)
1685 return true;
1686 return false;
1687 }
1688 if (id < KVM_S390_BSCA_CPU_SLOTS)
1689 return true;
1690 if (!sclp.has_esca || !sclp.has_64bscao)
1691 return false;
1692
1693 mutex_lock(&kvm->lock);
1694 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1695 mutex_unlock(&kvm->lock);
1696
1697 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1698 }
1699
1700 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1701 {
1702 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1703 kvm_clear_async_pf_completion_queue(vcpu);
1704 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1705 KVM_SYNC_GPRS |
1706 KVM_SYNC_ACRS |
1707 KVM_SYNC_CRS |
1708 KVM_SYNC_ARCH0 |
1709 KVM_SYNC_PFAULT;
1710 kvm_s390_set_prefix(vcpu, 0);
1711 if (test_kvm_facility(vcpu->kvm, 64))
1712 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1713 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1714 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1715 */
1716 if (MACHINE_HAS_VX)
1717 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1718 else
1719 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1720
1721 if (kvm_is_ucontrol(vcpu->kvm))
1722 return __kvm_ucontrol_vcpu_init(vcpu);
1723
1724 return 0;
1725 }
1726
1727 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1728 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1729 {
1730 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1731 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1732 vcpu->arch.cputm_start = get_tod_clock_fast();
1733 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1734 }
1735
1736 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1737 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1738 {
1739 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1740 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1741 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1742 vcpu->arch.cputm_start = 0;
1743 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1744 }
1745
1746 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1747 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1748 {
1749 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1750 vcpu->arch.cputm_enabled = true;
1751 __start_cpu_timer_accounting(vcpu);
1752 }
1753
1754 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1755 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1756 {
1757 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1758 __stop_cpu_timer_accounting(vcpu);
1759 vcpu->arch.cputm_enabled = false;
1760 }
1761
1762 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1763 {
1764 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1765 __enable_cpu_timer_accounting(vcpu);
1766 preempt_enable();
1767 }
1768
1769 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1770 {
1771 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1772 __disable_cpu_timer_accounting(vcpu);
1773 preempt_enable();
1774 }
1775
1776 /* set the cpu timer - may only be called from the VCPU thread itself */
1777 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1778 {
1779 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1780 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1781 if (vcpu->arch.cputm_enabled)
1782 vcpu->arch.cputm_start = get_tod_clock_fast();
1783 vcpu->arch.sie_block->cputm = cputm;
1784 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1785 preempt_enable();
1786 }
1787
1788 /* update and get the cpu timer - can also be called from other VCPU threads */
1789 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1790 {
1791 unsigned int seq;
1792 __u64 value;
1793
1794 if (unlikely(!vcpu->arch.cputm_enabled))
1795 return vcpu->arch.sie_block->cputm;
1796
1797 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1798 do {
1799 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1800 /*
1801 * If the writer would ever execute a read in the critical
1802 * section, e.g. in irq context, we have a deadlock.
1803 */
1804 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1805 value = vcpu->arch.sie_block->cputm;
1806 /* if cputm_start is 0, accounting is being started/stopped */
1807 if (likely(vcpu->arch.cputm_start))
1808 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1809 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1810 preempt_enable();
1811 return value;
1812 }
1813
1814 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1815 {
1816
1817 gmap_enable(vcpu->arch.enabled_gmap);
1818 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1819 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1820 __start_cpu_timer_accounting(vcpu);
1821 vcpu->cpu = cpu;
1822 }
1823
1824 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1825 {
1826 vcpu->cpu = -1;
1827 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1828 __stop_cpu_timer_accounting(vcpu);
1829 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1830 vcpu->arch.enabled_gmap = gmap_get_enabled();
1831 gmap_disable(vcpu->arch.enabled_gmap);
1832
1833 }
1834
1835 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1836 {
1837 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1838 vcpu->arch.sie_block->gpsw.mask = 0UL;
1839 vcpu->arch.sie_block->gpsw.addr = 0UL;
1840 kvm_s390_set_prefix(vcpu, 0);
1841 kvm_s390_set_cpu_timer(vcpu, 0);
1842 vcpu->arch.sie_block->ckc = 0UL;
1843 vcpu->arch.sie_block->todpr = 0;
1844 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1845 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1846 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1847 /* make sure the new fpc will be lazily loaded */
1848 save_fpu_regs();
1849 current->thread.fpu.fpc = 0;
1850 vcpu->arch.sie_block->gbea = 1;
1851 vcpu->arch.sie_block->pp = 0;
1852 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1853 kvm_clear_async_pf_completion_queue(vcpu);
1854 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1855 kvm_s390_vcpu_stop(vcpu);
1856 kvm_s390_clear_local_irqs(vcpu);
1857 }
1858
1859 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1860 {
1861 mutex_lock(&vcpu->kvm->lock);
1862 preempt_disable();
1863 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1864 preempt_enable();
1865 mutex_unlock(&vcpu->kvm->lock);
1866 if (!kvm_is_ucontrol(vcpu->kvm)) {
1867 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1868 sca_add_vcpu(vcpu);
1869 }
1870 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1871 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1872 /* make vcpu_load load the right gmap on the first trigger */
1873 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1874 }
1875
1876 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1877 {
1878 if (!test_kvm_facility(vcpu->kvm, 76))
1879 return;
1880
1881 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1882
1883 if (vcpu->kvm->arch.crypto.aes_kw)
1884 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1885 if (vcpu->kvm->arch.crypto.dea_kw)
1886 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1887
1888 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1889 }
1890
1891 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1892 {
1893 free_page(vcpu->arch.sie_block->cbrlo);
1894 vcpu->arch.sie_block->cbrlo = 0;
1895 }
1896
1897 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1898 {
1899 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1900 if (!vcpu->arch.sie_block->cbrlo)
1901 return -ENOMEM;
1902
1903 vcpu->arch.sie_block->ecb2 |= 0x80;
1904 vcpu->arch.sie_block->ecb2 &= ~0x08;
1905 return 0;
1906 }
1907
1908 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1909 {
1910 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1911
1912 vcpu->arch.sie_block->ibc = model->ibc;
1913 if (test_kvm_facility(vcpu->kvm, 7))
1914 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1915 }
1916
1917 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1918 {
1919 int rc = 0;
1920
1921 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1922 CPUSTAT_SM |
1923 CPUSTAT_STOPPED);
1924
1925 if (test_kvm_facility(vcpu->kvm, 78))
1926 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1927 else if (test_kvm_facility(vcpu->kvm, 8))
1928 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1929
1930 kvm_s390_vcpu_setup_model(vcpu);
1931
1932 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1933 if (MACHINE_HAS_ESOP)
1934 vcpu->arch.sie_block->ecb |= 0x02;
1935 if (test_kvm_facility(vcpu->kvm, 9))
1936 vcpu->arch.sie_block->ecb |= 0x04;
1937 if (test_kvm_facility(vcpu->kvm, 73))
1938 vcpu->arch.sie_block->ecb |= 0x10;
1939
1940 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1941 vcpu->arch.sie_block->ecb2 |= 0x08;
1942 vcpu->arch.sie_block->eca = 0x1002000U;
1943 if (sclp.has_cei)
1944 vcpu->arch.sie_block->eca |= 0x80000000U;
1945 if (sclp.has_ib)
1946 vcpu->arch.sie_block->eca |= 0x40000000U;
1947 if (sclp.has_siif)
1948 vcpu->arch.sie_block->eca |= 1;
1949 if (sclp.has_sigpif)
1950 vcpu->arch.sie_block->eca |= 0x10000000U;
1951 if (test_kvm_facility(vcpu->kvm, 129)) {
1952 vcpu->arch.sie_block->eca |= 0x00020000;
1953 vcpu->arch.sie_block->ecd |= 0x20000000;
1954 }
1955 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1956 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1957
1958 if (vcpu->kvm->arch.use_cmma) {
1959 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1960 if (rc)
1961 return rc;
1962 }
1963 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1964 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1965
1966 kvm_s390_vcpu_crypto_setup(vcpu);
1967
1968 return rc;
1969 }
1970
1971 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1972 unsigned int id)
1973 {
1974 struct kvm_vcpu *vcpu;
1975 struct sie_page *sie_page;
1976 int rc = -EINVAL;
1977
1978 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1979 goto out;
1980
1981 rc = -ENOMEM;
1982
1983 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1984 if (!vcpu)
1985 goto out;
1986
1987 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1988 if (!sie_page)
1989 goto out_free_cpu;
1990
1991 vcpu->arch.sie_block = &sie_page->sie_block;
1992 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1993
1994 /* the real guest size will always be smaller than msl */
1995 vcpu->arch.sie_block->mso = 0;
1996 vcpu->arch.sie_block->msl = sclp.hamax;
1997
1998 vcpu->arch.sie_block->icpua = id;
1999 spin_lock_init(&vcpu->arch.local_int.lock);
2000 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2001 vcpu->arch.local_int.wq = &vcpu->wq;
2002 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2003 seqcount_init(&vcpu->arch.cputm_seqcount);
2004
2005 rc = kvm_vcpu_init(vcpu, kvm, id);
2006 if (rc)
2007 goto out_free_sie_block;
2008 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2009 vcpu->arch.sie_block);
2010 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2011
2012 return vcpu;
2013 out_free_sie_block:
2014 free_page((unsigned long)(vcpu->arch.sie_block));
2015 out_free_cpu:
2016 kmem_cache_free(kvm_vcpu_cache, vcpu);
2017 out:
2018 return ERR_PTR(rc);
2019 }
2020
2021 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2022 {
2023 return kvm_s390_vcpu_has_irq(vcpu, 0);
2024 }
2025
2026 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2027 {
2028 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2029 exit_sie(vcpu);
2030 }
2031
2032 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2033 {
2034 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2035 }
2036
2037 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2038 {
2039 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2040 exit_sie(vcpu);
2041 }
2042
2043 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2044 {
2045 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2046 }
2047
2048 /*
2049 * Kick a guest cpu out of SIE and wait until SIE is not running.
2050 * If the CPU is not running (e.g. waiting as idle) the function will
2051 * return immediately. */
2052 void exit_sie(struct kvm_vcpu *vcpu)
2053 {
2054 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2055 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2056 cpu_relax();
2057 }
2058
2059 /* Kick a guest cpu out of SIE to process a request synchronously */
2060 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2061 {
2062 kvm_make_request(req, vcpu);
2063 kvm_s390_vcpu_request(vcpu);
2064 }
2065
2066 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2067 unsigned long end)
2068 {
2069 struct kvm *kvm = gmap->private;
2070 struct kvm_vcpu *vcpu;
2071 unsigned long prefix;
2072 int i;
2073
2074 if (gmap_is_shadow(gmap))
2075 return;
2076 if (start >= 1UL << 31)
2077 /* We are only interested in prefix pages */
2078 return;
2079 kvm_for_each_vcpu(i, vcpu, kvm) {
2080 /* match against both prefix pages */
2081 prefix = kvm_s390_get_prefix(vcpu);
2082 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2083 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2084 start, end);
2085 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2086 }
2087 }
2088 }
2089
2090 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2091 {
2092 /* kvm common code refers to this, but never calls it */
2093 BUG();
2094 return 0;
2095 }
2096
2097 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2098 struct kvm_one_reg *reg)
2099 {
2100 int r = -EINVAL;
2101
2102 switch (reg->id) {
2103 case KVM_REG_S390_TODPR:
2104 r = put_user(vcpu->arch.sie_block->todpr,
2105 (u32 __user *)reg->addr);
2106 break;
2107 case KVM_REG_S390_EPOCHDIFF:
2108 r = put_user(vcpu->arch.sie_block->epoch,
2109 (u64 __user *)reg->addr);
2110 break;
2111 case KVM_REG_S390_CPU_TIMER:
2112 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2113 (u64 __user *)reg->addr);
2114 break;
2115 case KVM_REG_S390_CLOCK_COMP:
2116 r = put_user(vcpu->arch.sie_block->ckc,
2117 (u64 __user *)reg->addr);
2118 break;
2119 case KVM_REG_S390_PFTOKEN:
2120 r = put_user(vcpu->arch.pfault_token,
2121 (u64 __user *)reg->addr);
2122 break;
2123 case KVM_REG_S390_PFCOMPARE:
2124 r = put_user(vcpu->arch.pfault_compare,
2125 (u64 __user *)reg->addr);
2126 break;
2127 case KVM_REG_S390_PFSELECT:
2128 r = put_user(vcpu->arch.pfault_select,
2129 (u64 __user *)reg->addr);
2130 break;
2131 case KVM_REG_S390_PP:
2132 r = put_user(vcpu->arch.sie_block->pp,
2133 (u64 __user *)reg->addr);
2134 break;
2135 case KVM_REG_S390_GBEA:
2136 r = put_user(vcpu->arch.sie_block->gbea,
2137 (u64 __user *)reg->addr);
2138 break;
2139 default:
2140 break;
2141 }
2142
2143 return r;
2144 }
2145
2146 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2147 struct kvm_one_reg *reg)
2148 {
2149 int r = -EINVAL;
2150 __u64 val;
2151
2152 switch (reg->id) {
2153 case KVM_REG_S390_TODPR:
2154 r = get_user(vcpu->arch.sie_block->todpr,
2155 (u32 __user *)reg->addr);
2156 break;
2157 case KVM_REG_S390_EPOCHDIFF:
2158 r = get_user(vcpu->arch.sie_block->epoch,
2159 (u64 __user *)reg->addr);
2160 break;
2161 case KVM_REG_S390_CPU_TIMER:
2162 r = get_user(val, (u64 __user *)reg->addr);
2163 if (!r)
2164 kvm_s390_set_cpu_timer(vcpu, val);
2165 break;
2166 case KVM_REG_S390_CLOCK_COMP:
2167 r = get_user(vcpu->arch.sie_block->ckc,
2168 (u64 __user *)reg->addr);
2169 break;
2170 case KVM_REG_S390_PFTOKEN:
2171 r = get_user(vcpu->arch.pfault_token,
2172 (u64 __user *)reg->addr);
2173 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2174 kvm_clear_async_pf_completion_queue(vcpu);
2175 break;
2176 case KVM_REG_S390_PFCOMPARE:
2177 r = get_user(vcpu->arch.pfault_compare,
2178 (u64 __user *)reg->addr);
2179 break;
2180 case KVM_REG_S390_PFSELECT:
2181 r = get_user(vcpu->arch.pfault_select,
2182 (u64 __user *)reg->addr);
2183 break;
2184 case KVM_REG_S390_PP:
2185 r = get_user(vcpu->arch.sie_block->pp,
2186 (u64 __user *)reg->addr);
2187 break;
2188 case KVM_REG_S390_GBEA:
2189 r = get_user(vcpu->arch.sie_block->gbea,
2190 (u64 __user *)reg->addr);
2191 break;
2192 default:
2193 break;
2194 }
2195
2196 return r;
2197 }
2198
2199 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2200 {
2201 kvm_s390_vcpu_initial_reset(vcpu);
2202 return 0;
2203 }
2204
2205 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2206 {
2207 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2208 return 0;
2209 }
2210
2211 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2212 {
2213 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2214 return 0;
2215 }
2216
2217 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2218 struct kvm_sregs *sregs)
2219 {
2220 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2221 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2222 return 0;
2223 }
2224
2225 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2226 struct kvm_sregs *sregs)
2227 {
2228 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2229 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2230 return 0;
2231 }
2232
2233 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2234 {
2235 if (test_fp_ctl(fpu->fpc))
2236 return -EINVAL;
2237 vcpu->run->s.regs.fpc = fpu->fpc;
2238 if (MACHINE_HAS_VX)
2239 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2240 (freg_t *) fpu->fprs);
2241 else
2242 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2243 return 0;
2244 }
2245
2246 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2247 {
2248 /* make sure we have the latest values */
2249 save_fpu_regs();
2250 if (MACHINE_HAS_VX)
2251 convert_vx_to_fp((freg_t *) fpu->fprs,
2252 (__vector128 *) vcpu->run->s.regs.vrs);
2253 else
2254 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2255 fpu->fpc = vcpu->run->s.regs.fpc;
2256 return 0;
2257 }
2258
2259 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2260 {
2261 int rc = 0;
2262
2263 if (!is_vcpu_stopped(vcpu))
2264 rc = -EBUSY;
2265 else {
2266 vcpu->run->psw_mask = psw.mask;
2267 vcpu->run->psw_addr = psw.addr;
2268 }
2269 return rc;
2270 }
2271
2272 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2273 struct kvm_translation *tr)
2274 {
2275 return -EINVAL; /* not implemented yet */
2276 }
2277
2278 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2279 KVM_GUESTDBG_USE_HW_BP | \
2280 KVM_GUESTDBG_ENABLE)
2281
2282 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2283 struct kvm_guest_debug *dbg)
2284 {
2285 int rc = 0;
2286
2287 vcpu->guest_debug = 0;
2288 kvm_s390_clear_bp_data(vcpu);
2289
2290 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2291 return -EINVAL;
2292 if (!sclp.has_gpere)
2293 return -EINVAL;
2294
2295 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2296 vcpu->guest_debug = dbg->control;
2297 /* enforce guest PER */
2298 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2299
2300 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2301 rc = kvm_s390_import_bp_data(vcpu, dbg);
2302 } else {
2303 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2304 vcpu->arch.guestdbg.last_bp = 0;
2305 }
2306
2307 if (rc) {
2308 vcpu->guest_debug = 0;
2309 kvm_s390_clear_bp_data(vcpu);
2310 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2311 }
2312
2313 return rc;
2314 }
2315
2316 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2317 struct kvm_mp_state *mp_state)
2318 {
2319 /* CHECK_STOP and LOAD are not supported yet */
2320 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2321 KVM_MP_STATE_OPERATING;
2322 }
2323
2324 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2325 struct kvm_mp_state *mp_state)
2326 {
2327 int rc = 0;
2328
2329 /* user space knows about this interface - let it control the state */
2330 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2331
2332 switch (mp_state->mp_state) {
2333 case KVM_MP_STATE_STOPPED:
2334 kvm_s390_vcpu_stop(vcpu);
2335 break;
2336 case KVM_MP_STATE_OPERATING:
2337 kvm_s390_vcpu_start(vcpu);
2338 break;
2339 case KVM_MP_STATE_LOAD:
2340 case KVM_MP_STATE_CHECK_STOP:
2341 /* fall through - CHECK_STOP and LOAD are not supported yet */
2342 default:
2343 rc = -ENXIO;
2344 }
2345
2346 return rc;
2347 }
2348
2349 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2350 {
2351 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2352 }
2353
2354 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2355 {
2356 retry:
2357 kvm_s390_vcpu_request_handled(vcpu);
2358 if (!vcpu->requests)
2359 return 0;
2360 /*
2361 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2362 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2363 * This ensures that the ipte instruction for this request has
2364 * already finished. We might race against a second unmapper that
2365 * wants to set the blocking bit. Lets just retry the request loop.
2366 */
2367 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2368 int rc;
2369 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2370 kvm_s390_get_prefix(vcpu),
2371 PAGE_SIZE * 2, PROT_WRITE);
2372 if (rc) {
2373 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2374 return rc;
2375 }
2376 goto retry;
2377 }
2378
2379 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2380 vcpu->arch.sie_block->ihcpu = 0xffff;
2381 goto retry;
2382 }
2383
2384 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2385 if (!ibs_enabled(vcpu)) {
2386 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2387 atomic_or(CPUSTAT_IBS,
2388 &vcpu->arch.sie_block->cpuflags);
2389 }
2390 goto retry;
2391 }
2392
2393 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2394 if (ibs_enabled(vcpu)) {
2395 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2396 atomic_andnot(CPUSTAT_IBS,
2397 &vcpu->arch.sie_block->cpuflags);
2398 }
2399 goto retry;
2400 }
2401
2402 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2403 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2404 goto retry;
2405 }
2406
2407 /* nothing to do, just clear the request */
2408 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2409
2410 return 0;
2411 }
2412
2413 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2414 {
2415 struct kvm_vcpu *vcpu;
2416 int i;
2417
2418 mutex_lock(&kvm->lock);
2419 preempt_disable();
2420 kvm->arch.epoch = tod - get_tod_clock();
2421 kvm_s390_vcpu_block_all(kvm);
2422 kvm_for_each_vcpu(i, vcpu, kvm)
2423 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2424 kvm_s390_vcpu_unblock_all(kvm);
2425 preempt_enable();
2426 mutex_unlock(&kvm->lock);
2427 }
2428
2429 /**
2430 * kvm_arch_fault_in_page - fault-in guest page if necessary
2431 * @vcpu: The corresponding virtual cpu
2432 * @gpa: Guest physical address
2433 * @writable: Whether the page should be writable or not
2434 *
2435 * Make sure that a guest page has been faulted-in on the host.
2436 *
2437 * Return: Zero on success, negative error code otherwise.
2438 */
2439 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2440 {
2441 return gmap_fault(vcpu->arch.gmap, gpa,
2442 writable ? FAULT_FLAG_WRITE : 0);
2443 }
2444
2445 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2446 unsigned long token)
2447 {
2448 struct kvm_s390_interrupt inti;
2449 struct kvm_s390_irq irq;
2450
2451 if (start_token) {
2452 irq.u.ext.ext_params2 = token;
2453 irq.type = KVM_S390_INT_PFAULT_INIT;
2454 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2455 } else {
2456 inti.type = KVM_S390_INT_PFAULT_DONE;
2457 inti.parm64 = token;
2458 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2459 }
2460 }
2461
2462 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2463 struct kvm_async_pf *work)
2464 {
2465 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2466 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2467 }
2468
2469 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2470 struct kvm_async_pf *work)
2471 {
2472 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2473 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2474 }
2475
2476 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2477 struct kvm_async_pf *work)
2478 {
2479 /* s390 will always inject the page directly */
2480 }
2481
2482 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2483 {
2484 /*
2485 * s390 will always inject the page directly,
2486 * but we still want check_async_completion to cleanup
2487 */
2488 return true;
2489 }
2490
2491 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2492 {
2493 hva_t hva;
2494 struct kvm_arch_async_pf arch;
2495 int rc;
2496
2497 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2498 return 0;
2499 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2500 vcpu->arch.pfault_compare)
2501 return 0;
2502 if (psw_extint_disabled(vcpu))
2503 return 0;
2504 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2505 return 0;
2506 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2507 return 0;
2508 if (!vcpu->arch.gmap->pfault_enabled)
2509 return 0;
2510
2511 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2512 hva += current->thread.gmap_addr & ~PAGE_MASK;
2513 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2514 return 0;
2515
2516 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2517 return rc;
2518 }
2519
2520 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2521 {
2522 int rc, cpuflags;
2523
2524 /*
2525 * On s390 notifications for arriving pages will be delivered directly
2526 * to the guest but the house keeping for completed pfaults is
2527 * handled outside the worker.
2528 */
2529 kvm_check_async_pf_completion(vcpu);
2530
2531 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2532 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2533
2534 if (need_resched())
2535 schedule();
2536
2537 if (test_cpu_flag(CIF_MCCK_PENDING))
2538 s390_handle_mcck();
2539
2540 if (!kvm_is_ucontrol(vcpu->kvm)) {
2541 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2542 if (rc)
2543 return rc;
2544 }
2545
2546 rc = kvm_s390_handle_requests(vcpu);
2547 if (rc)
2548 return rc;
2549
2550 if (guestdbg_enabled(vcpu)) {
2551 kvm_s390_backup_guest_per_regs(vcpu);
2552 kvm_s390_patch_guest_per_regs(vcpu);
2553 }
2554
2555 vcpu->arch.sie_block->icptcode = 0;
2556 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2557 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2558 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2559
2560 return 0;
2561 }
2562
2563 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2564 {
2565 struct kvm_s390_pgm_info pgm_info = {
2566 .code = PGM_ADDRESSING,
2567 };
2568 u8 opcode, ilen;
2569 int rc;
2570
2571 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2572 trace_kvm_s390_sie_fault(vcpu);
2573
2574 /*
2575 * We want to inject an addressing exception, which is defined as a
2576 * suppressing or terminating exception. However, since we came here
2577 * by a DAT access exception, the PSW still points to the faulting
2578 * instruction since DAT exceptions are nullifying. So we've got
2579 * to look up the current opcode to get the length of the instruction
2580 * to be able to forward the PSW.
2581 */
2582 rc = read_guest_instr(vcpu, &opcode, 1);
2583 ilen = insn_length(opcode);
2584 if (rc < 0) {
2585 return rc;
2586 } else if (rc) {
2587 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2588 * Forward by arbitrary ilc, injection will take care of
2589 * nullification if necessary.
2590 */
2591 pgm_info = vcpu->arch.pgm;
2592 ilen = 4;
2593 }
2594 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2595 kvm_s390_forward_psw(vcpu, ilen);
2596 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2597 }
2598
2599 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2600 {
2601 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2602 vcpu->arch.sie_block->icptcode);
2603 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2604
2605 if (guestdbg_enabled(vcpu))
2606 kvm_s390_restore_guest_per_regs(vcpu);
2607
2608 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2609 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2610
2611 if (vcpu->arch.sie_block->icptcode > 0) {
2612 int rc = kvm_handle_sie_intercept(vcpu);
2613
2614 if (rc != -EOPNOTSUPP)
2615 return rc;
2616 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2617 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2618 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2619 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2620 return -EREMOTE;
2621 } else if (exit_reason != -EFAULT) {
2622 vcpu->stat.exit_null++;
2623 return 0;
2624 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2625 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2626 vcpu->run->s390_ucontrol.trans_exc_code =
2627 current->thread.gmap_addr;
2628 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2629 return -EREMOTE;
2630 } else if (current->thread.gmap_pfault) {
2631 trace_kvm_s390_major_guest_pfault(vcpu);
2632 current->thread.gmap_pfault = 0;
2633 if (kvm_arch_setup_async_pf(vcpu))
2634 return 0;
2635 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2636 }
2637 return vcpu_post_run_fault_in_sie(vcpu);
2638 }
2639
2640 static int __vcpu_run(struct kvm_vcpu *vcpu)
2641 {
2642 int rc, exit_reason;
2643
2644 /*
2645 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2646 * ning the guest), so that memslots (and other stuff) are protected
2647 */
2648 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2649
2650 do {
2651 rc = vcpu_pre_run(vcpu);
2652 if (rc)
2653 break;
2654
2655 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2656 /*
2657 * As PF_VCPU will be used in fault handler, between
2658 * guest_enter and guest_exit should be no uaccess.
2659 */
2660 local_irq_disable();
2661 guest_enter_irqoff();
2662 __disable_cpu_timer_accounting(vcpu);
2663 local_irq_enable();
2664 exit_reason = sie64a(vcpu->arch.sie_block,
2665 vcpu->run->s.regs.gprs);
2666 local_irq_disable();
2667 __enable_cpu_timer_accounting(vcpu);
2668 guest_exit_irqoff();
2669 local_irq_enable();
2670 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2671
2672 rc = vcpu_post_run(vcpu, exit_reason);
2673 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2674
2675 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2676 return rc;
2677 }
2678
2679 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2680 {
2681 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2682 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2683 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2684 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2685 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2686 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2687 /* some control register changes require a tlb flush */
2688 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2689 }
2690 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2691 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2692 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2693 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2694 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2695 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2696 }
2697 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2698 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2699 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2700 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2701 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2702 kvm_clear_async_pf_completion_queue(vcpu);
2703 }
2704 /*
2705 * If userspace sets the riccb (e.g. after migration) to a valid state,
2706 * we should enable RI here instead of doing the lazy enablement.
2707 */
2708 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2709 test_kvm_facility(vcpu->kvm, 64)) {
2710 struct runtime_instr_cb *riccb =
2711 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2712
2713 if (riccb->valid)
2714 vcpu->arch.sie_block->ecb3 |= 0x01;
2715 }
2716 save_access_regs(vcpu->arch.host_acrs);
2717 restore_access_regs(vcpu->run->s.regs.acrs);
2718 /* save host (userspace) fprs/vrs */
2719 save_fpu_regs();
2720 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2721 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2722 if (MACHINE_HAS_VX)
2723 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2724 else
2725 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2726 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2727 if (test_fp_ctl(current->thread.fpu.fpc))
2728 /* User space provided an invalid FPC, let's clear it */
2729 current->thread.fpu.fpc = 0;
2730
2731 kvm_run->kvm_dirty_regs = 0;
2732 }
2733
2734 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2735 {
2736 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2737 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2738 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2739 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2740 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2741 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2742 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2743 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2744 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2745 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2746 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2747 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2748 save_access_regs(vcpu->run->s.regs.acrs);
2749 restore_access_regs(vcpu->arch.host_acrs);
2750 /* Save guest register state */
2751 save_fpu_regs();
2752 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2753 /* Restore will be done lazily at return */
2754 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2755 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2756
2757 }
2758
2759 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2760 {
2761 int rc;
2762 sigset_t sigsaved;
2763
2764 if (guestdbg_exit_pending(vcpu)) {
2765 kvm_s390_prepare_debug_exit(vcpu);
2766 return 0;
2767 }
2768
2769 if (vcpu->sigset_active)
2770 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2771
2772 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2773 kvm_s390_vcpu_start(vcpu);
2774 } else if (is_vcpu_stopped(vcpu)) {
2775 pr_err_ratelimited("can't run stopped vcpu %d\n",
2776 vcpu->vcpu_id);
2777 return -EINVAL;
2778 }
2779
2780 sync_regs(vcpu, kvm_run);
2781 enable_cpu_timer_accounting(vcpu);
2782
2783 might_fault();
2784 rc = __vcpu_run(vcpu);
2785
2786 if (signal_pending(current) && !rc) {
2787 kvm_run->exit_reason = KVM_EXIT_INTR;
2788 rc = -EINTR;
2789 }
2790
2791 if (guestdbg_exit_pending(vcpu) && !rc) {
2792 kvm_s390_prepare_debug_exit(vcpu);
2793 rc = 0;
2794 }
2795
2796 if (rc == -EREMOTE) {
2797 /* userspace support is needed, kvm_run has been prepared */
2798 rc = 0;
2799 }
2800
2801 disable_cpu_timer_accounting(vcpu);
2802 store_regs(vcpu, kvm_run);
2803
2804 if (vcpu->sigset_active)
2805 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2806
2807 vcpu->stat.exit_userspace++;
2808 return rc;
2809 }
2810
2811 /*
2812 * store status at address
2813 * we use have two special cases:
2814 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2815 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2816 */
2817 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2818 {
2819 unsigned char archmode = 1;
2820 freg_t fprs[NUM_FPRS];
2821 unsigned int px;
2822 u64 clkcomp, cputm;
2823 int rc;
2824
2825 px = kvm_s390_get_prefix(vcpu);
2826 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2827 if (write_guest_abs(vcpu, 163, &archmode, 1))
2828 return -EFAULT;
2829 gpa = 0;
2830 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2831 if (write_guest_real(vcpu, 163, &archmode, 1))
2832 return -EFAULT;
2833 gpa = px;
2834 } else
2835 gpa -= __LC_FPREGS_SAVE_AREA;
2836
2837 /* manually convert vector registers if necessary */
2838 if (MACHINE_HAS_VX) {
2839 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2840 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2841 fprs, 128);
2842 } else {
2843 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2844 vcpu->run->s.regs.fprs, 128);
2845 }
2846 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2847 vcpu->run->s.regs.gprs, 128);
2848 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2849 &vcpu->arch.sie_block->gpsw, 16);
2850 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2851 &px, 4);
2852 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2853 &vcpu->run->s.regs.fpc, 4);
2854 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2855 &vcpu->arch.sie_block->todpr, 4);
2856 cputm = kvm_s390_get_cpu_timer(vcpu);
2857 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2858 &cputm, 8);
2859 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2860 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2861 &clkcomp, 8);
2862 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2863 &vcpu->run->s.regs.acrs, 64);
2864 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2865 &vcpu->arch.sie_block->gcr, 128);
2866 return rc ? -EFAULT : 0;
2867 }
2868
2869 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2870 {
2871 /*
2872 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2873 * switch in the run ioctl. Let's update our copies before we save
2874 * it into the save area
2875 */
2876 save_fpu_regs();
2877 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2878 save_access_regs(vcpu->run->s.regs.acrs);
2879
2880 return kvm_s390_store_status_unloaded(vcpu, addr);
2881 }
2882
2883 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2884 {
2885 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2886 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2887 }
2888
2889 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2890 {
2891 unsigned int i;
2892 struct kvm_vcpu *vcpu;
2893
2894 kvm_for_each_vcpu(i, vcpu, kvm) {
2895 __disable_ibs_on_vcpu(vcpu);
2896 }
2897 }
2898
2899 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2900 {
2901 if (!sclp.has_ibs)
2902 return;
2903 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2904 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2905 }
2906
2907 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2908 {
2909 int i, online_vcpus, started_vcpus = 0;
2910
2911 if (!is_vcpu_stopped(vcpu))
2912 return;
2913
2914 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2915 /* Only one cpu at a time may enter/leave the STOPPED state. */
2916 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2917 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2918
2919 for (i = 0; i < online_vcpus; i++) {
2920 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2921 started_vcpus++;
2922 }
2923
2924 if (started_vcpus == 0) {
2925 /* we're the only active VCPU -> speed it up */
2926 __enable_ibs_on_vcpu(vcpu);
2927 } else if (started_vcpus == 1) {
2928 /*
2929 * As we are starting a second VCPU, we have to disable
2930 * the IBS facility on all VCPUs to remove potentially
2931 * oustanding ENABLE requests.
2932 */
2933 __disable_ibs_on_all_vcpus(vcpu->kvm);
2934 }
2935
2936 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2937 /*
2938 * Another VCPU might have used IBS while we were offline.
2939 * Let's play safe and flush the VCPU at startup.
2940 */
2941 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2942 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2943 return;
2944 }
2945
2946 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2947 {
2948 int i, online_vcpus, started_vcpus = 0;
2949 struct kvm_vcpu *started_vcpu = NULL;
2950
2951 if (is_vcpu_stopped(vcpu))
2952 return;
2953
2954 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2955 /* Only one cpu at a time may enter/leave the STOPPED state. */
2956 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2957 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2958
2959 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2960 kvm_s390_clear_stop_irq(vcpu);
2961
2962 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2963 __disable_ibs_on_vcpu(vcpu);
2964
2965 for (i = 0; i < online_vcpus; i++) {
2966 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2967 started_vcpus++;
2968 started_vcpu = vcpu->kvm->vcpus[i];
2969 }
2970 }
2971
2972 if (started_vcpus == 1) {
2973 /*
2974 * As we only have one VCPU left, we want to enable the
2975 * IBS facility for that VCPU to speed it up.
2976 */
2977 __enable_ibs_on_vcpu(started_vcpu);
2978 }
2979
2980 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2981 return;
2982 }
2983
2984 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2985 struct kvm_enable_cap *cap)
2986 {
2987 int r;
2988
2989 if (cap->flags)
2990 return -EINVAL;
2991
2992 switch (cap->cap) {
2993 case KVM_CAP_S390_CSS_SUPPORT:
2994 if (!vcpu->kvm->arch.css_support) {
2995 vcpu->kvm->arch.css_support = 1;
2996 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2997 trace_kvm_s390_enable_css(vcpu->kvm);
2998 }
2999 r = 0;
3000 break;
3001 default:
3002 r = -EINVAL;
3003 break;
3004 }
3005 return r;
3006 }
3007
3008 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3009 struct kvm_s390_mem_op *mop)
3010 {
3011 void __user *uaddr = (void __user *)mop->buf;
3012 void *tmpbuf = NULL;
3013 int r, srcu_idx;
3014 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3015 | KVM_S390_MEMOP_F_CHECK_ONLY;
3016
3017 if (mop->flags & ~supported_flags)
3018 return -EINVAL;
3019
3020 if (mop->size > MEM_OP_MAX_SIZE)
3021 return -E2BIG;
3022
3023 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3024 tmpbuf = vmalloc(mop->size);
3025 if (!tmpbuf)
3026 return -ENOMEM;
3027 }
3028
3029 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3030
3031 switch (mop->op) {
3032 case KVM_S390_MEMOP_LOGICAL_READ:
3033 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3034 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3035 mop->size, GACC_FETCH);
3036 break;
3037 }
3038 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3039 if (r == 0) {
3040 if (copy_to_user(uaddr, tmpbuf, mop->size))
3041 r = -EFAULT;
3042 }
3043 break;
3044 case KVM_S390_MEMOP_LOGICAL_WRITE:
3045 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3046 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3047 mop->size, GACC_STORE);
3048 break;
3049 }
3050 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3051 r = -EFAULT;
3052 break;
3053 }
3054 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3055 break;
3056 default:
3057 r = -EINVAL;
3058 }
3059
3060 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3061
3062 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3063 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3064
3065 vfree(tmpbuf);
3066 return r;
3067 }
3068
3069 long kvm_arch_vcpu_ioctl(struct file *filp,
3070 unsigned int ioctl, unsigned long arg)
3071 {
3072 struct kvm_vcpu *vcpu = filp->private_data;
3073 void __user *argp = (void __user *)arg;
3074 int idx;
3075 long r;
3076
3077 switch (ioctl) {
3078 case KVM_S390_IRQ: {
3079 struct kvm_s390_irq s390irq;
3080
3081 r = -EFAULT;
3082 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3083 break;
3084 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3085 break;
3086 }
3087 case KVM_S390_INTERRUPT: {
3088 struct kvm_s390_interrupt s390int;
3089 struct kvm_s390_irq s390irq;
3090
3091 r = -EFAULT;
3092 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3093 break;
3094 if (s390int_to_s390irq(&s390int, &s390irq))
3095 return -EINVAL;
3096 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3097 break;
3098 }
3099 case KVM_S390_STORE_STATUS:
3100 idx = srcu_read_lock(&vcpu->kvm->srcu);
3101 r = kvm_s390_vcpu_store_status(vcpu, arg);
3102 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3103 break;
3104 case KVM_S390_SET_INITIAL_PSW: {
3105 psw_t psw;
3106
3107 r = -EFAULT;
3108 if (copy_from_user(&psw, argp, sizeof(psw)))
3109 break;
3110 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3111 break;
3112 }
3113 case KVM_S390_INITIAL_RESET:
3114 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3115 break;
3116 case KVM_SET_ONE_REG:
3117 case KVM_GET_ONE_REG: {
3118 struct kvm_one_reg reg;
3119 r = -EFAULT;
3120 if (copy_from_user(&reg, argp, sizeof(reg)))
3121 break;
3122 if (ioctl == KVM_SET_ONE_REG)
3123 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3124 else
3125 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3126 break;
3127 }
3128 #ifdef CONFIG_KVM_S390_UCONTROL
3129 case KVM_S390_UCAS_MAP: {
3130 struct kvm_s390_ucas_mapping ucasmap;
3131
3132 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3133 r = -EFAULT;
3134 break;
3135 }
3136
3137 if (!kvm_is_ucontrol(vcpu->kvm)) {
3138 r = -EINVAL;
3139 break;
3140 }
3141
3142 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3143 ucasmap.vcpu_addr, ucasmap.length);
3144 break;
3145 }
3146 case KVM_S390_UCAS_UNMAP: {
3147 struct kvm_s390_ucas_mapping ucasmap;
3148
3149 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3150 r = -EFAULT;
3151 break;
3152 }
3153
3154 if (!kvm_is_ucontrol(vcpu->kvm)) {
3155 r = -EINVAL;
3156 break;
3157 }
3158
3159 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3160 ucasmap.length);
3161 break;
3162 }
3163 #endif
3164 case KVM_S390_VCPU_FAULT: {
3165 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3166 break;
3167 }
3168 case KVM_ENABLE_CAP:
3169 {
3170 struct kvm_enable_cap cap;
3171 r = -EFAULT;
3172 if (copy_from_user(&cap, argp, sizeof(cap)))
3173 break;
3174 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3175 break;
3176 }
3177 case KVM_S390_MEM_OP: {
3178 struct kvm_s390_mem_op mem_op;
3179
3180 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3181 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3182 else
3183 r = -EFAULT;
3184 break;
3185 }
3186 case KVM_S390_SET_IRQ_STATE: {
3187 struct kvm_s390_irq_state irq_state;
3188
3189 r = -EFAULT;
3190 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3191 break;
3192 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3193 irq_state.len == 0 ||
3194 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3195 r = -EINVAL;
3196 break;
3197 }
3198 r = kvm_s390_set_irq_state(vcpu,
3199 (void __user *) irq_state.buf,
3200 irq_state.len);
3201 break;
3202 }
3203 case KVM_S390_GET_IRQ_STATE: {
3204 struct kvm_s390_irq_state irq_state;
3205
3206 r = -EFAULT;
3207 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3208 break;
3209 if (irq_state.len == 0) {
3210 r = -EINVAL;
3211 break;
3212 }
3213 r = kvm_s390_get_irq_state(vcpu,
3214 (__u8 __user *) irq_state.buf,
3215 irq_state.len);
3216 break;
3217 }
3218 default:
3219 r = -ENOTTY;
3220 }
3221 return r;
3222 }
3223
3224 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3225 {
3226 #ifdef CONFIG_KVM_S390_UCONTROL
3227 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3228 && (kvm_is_ucontrol(vcpu->kvm))) {
3229 vmf->page = virt_to_page(vcpu->arch.sie_block);
3230 get_page(vmf->page);
3231 return 0;
3232 }
3233 #endif
3234 return VM_FAULT_SIGBUS;
3235 }
3236
3237 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3238 unsigned long npages)
3239 {
3240 return 0;
3241 }
3242
3243 /* Section: memory related */
3244 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3245 struct kvm_memory_slot *memslot,
3246 const struct kvm_userspace_memory_region *mem,
3247 enum kvm_mr_change change)
3248 {
3249 /* A few sanity checks. We can have memory slots which have to be
3250 located/ended at a segment boundary (1MB). The memory in userland is
3251 ok to be fragmented into various different vmas. It is okay to mmap()
3252 and munmap() stuff in this slot after doing this call at any time */
3253
3254 if (mem->userspace_addr & 0xffffful)
3255 return -EINVAL;
3256
3257 if (mem->memory_size & 0xffffful)
3258 return -EINVAL;
3259
3260 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3261 return -EINVAL;
3262
3263 return 0;
3264 }
3265
3266 void kvm_arch_commit_memory_region(struct kvm *kvm,
3267 const struct kvm_userspace_memory_region *mem,
3268 const struct kvm_memory_slot *old,
3269 const struct kvm_memory_slot *new,
3270 enum kvm_mr_change change)
3271 {
3272 int rc;
3273
3274 /* If the basics of the memslot do not change, we do not want
3275 * to update the gmap. Every update causes several unnecessary
3276 * segment translation exceptions. This is usually handled just
3277 * fine by the normal fault handler + gmap, but it will also
3278 * cause faults on the prefix page of running guest CPUs.
3279 */
3280 if (old->userspace_addr == mem->userspace_addr &&
3281 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3282 old->npages * PAGE_SIZE == mem->memory_size)
3283 return;
3284
3285 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3286 mem->guest_phys_addr, mem->memory_size);
3287 if (rc)
3288 pr_warn("failed to commit memory region\n");
3289 return;
3290 }
3291
3292 static inline unsigned long nonhyp_mask(int i)
3293 {
3294 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3295
3296 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3297 }
3298
3299 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3300 {
3301 vcpu->valid_wakeup = false;
3302 }
3303
3304 static int __init kvm_s390_init(void)
3305 {
3306 int i;
3307
3308 if (!sclp.has_sief2) {
3309 pr_info("SIE not available\n");
3310 return -ENODEV;
3311 }
3312
3313 for (i = 0; i < 16; i++)
3314 kvm_s390_fac_list_mask[i] |=
3315 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3316
3317 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3318 }
3319
3320 static void __exit kvm_s390_exit(void)
3321 {
3322 kvm_exit();
3323 }
3324
3325 module_init(kvm_s390_init);
3326 module_exit(kvm_s390_exit);
3327
3328 /*
3329 * Enable autoloading of the kvm module.
3330 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3331 * since x86 takes a different approach.
3332 */
3333 #include <linux/miscdevice.h>
3334 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3335 MODULE_ALIAS("devname:kvm");