]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame_incremental - arch/s390/kvm/kvm-s390.c
Merge tag 'kvm-s390-master-4.10-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mirror_ubuntu-jammy-kernel.git] / arch / s390 / kvm / kvm-s390.c
... / ...
CommitLineData
1/*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/mman.h>
25#include <linux/module.h>
26#include <linux/random.h>
27#include <linux/slab.h>
28#include <linux/timer.h>
29#include <linux/vmalloc.h>
30#include <linux/bitmap.h>
31#include <asm/asm-offsets.h>
32#include <asm/lowcore.h>
33#include <asm/stp.h>
34#include <asm/pgtable.h>
35#include <asm/gmap.h>
36#include <asm/nmi.h>
37#include <asm/switch_to.h>
38#include <asm/isc.h>
39#include <asm/sclp.h>
40#include <asm/cpacf.h>
41#include <asm/timex.h>
42#include "kvm-s390.h"
43#include "gaccess.h"
44
45#define KMSG_COMPONENT "kvm-s390"
46#undef pr_fmt
47#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49#define CREATE_TRACE_POINTS
50#include "trace.h"
51#include "trace-s390.h"
52
53#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
54#define LOCAL_IRQS 32
55#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_pei", VCPU_STAT(exit_pei) },
69 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91 { "instruction_spx", VCPU_STAT(instruction_spx) },
92 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93 { "instruction_stap", VCPU_STAT(instruction_stap) },
94 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98 { "instruction_essa", VCPU_STAT(instruction_essa) },
99 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103 { "instruction_sie", VCPU_STAT(instruction_sie) },
104 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120 { "diagnose_10", VCPU_STAT(diagnose_10) },
121 { "diagnose_44", VCPU_STAT(diagnose_44) },
122 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123 { "diagnose_258", VCPU_STAT(diagnose_258) },
124 { "diagnose_308", VCPU_STAT(diagnose_308) },
125 { "diagnose_500", VCPU_STAT(diagnose_500) },
126 { NULL }
127};
128
129/* allow nested virtualization in KVM (if enabled by user space) */
130static int nested;
131module_param(nested, int, S_IRUGO);
132MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134/* upper facilities limit for kvm */
135unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137unsigned long kvm_s390_fac_list_mask_size(void)
138{
139 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140 return ARRAY_SIZE(kvm_s390_fac_list_mask);
141}
142
143/* available cpu features supported by kvm */
144static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145/* available subfunctions indicated via query / "test bit" */
146static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148static struct gmap_notifier gmap_notifier;
149static struct gmap_notifier vsie_gmap_notifier;
150debug_info_t *kvm_s390_dbf;
151
152/* Section: not file related */
153int kvm_arch_hardware_enable(void)
154{
155 /* every s390 is virtualization enabled ;-) */
156 return 0;
157}
158
159static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160 unsigned long end);
161
162/*
163 * This callback is executed during stop_machine(). All CPUs are therefore
164 * temporarily stopped. In order not to change guest behavior, we have to
165 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166 * so a CPU won't be stopped while calculating with the epoch.
167 */
168static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169 void *v)
170{
171 struct kvm *kvm;
172 struct kvm_vcpu *vcpu;
173 int i;
174 unsigned long long *delta = v;
175
176 list_for_each_entry(kvm, &vm_list, vm_list) {
177 kvm->arch.epoch -= *delta;
178 kvm_for_each_vcpu(i, vcpu, kvm) {
179 vcpu->arch.sie_block->epoch -= *delta;
180 if (vcpu->arch.cputm_enabled)
181 vcpu->arch.cputm_start += *delta;
182 if (vcpu->arch.vsie_block)
183 vcpu->arch.vsie_block->epoch -= *delta;
184 }
185 }
186 return NOTIFY_OK;
187}
188
189static struct notifier_block kvm_clock_notifier = {
190 .notifier_call = kvm_clock_sync,
191};
192
193int kvm_arch_hardware_setup(void)
194{
195 gmap_notifier.notifier_call = kvm_gmap_notifier;
196 gmap_register_pte_notifier(&gmap_notifier);
197 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198 gmap_register_pte_notifier(&vsie_gmap_notifier);
199 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200 &kvm_clock_notifier);
201 return 0;
202}
203
204void kvm_arch_hardware_unsetup(void)
205{
206 gmap_unregister_pte_notifier(&gmap_notifier);
207 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209 &kvm_clock_notifier);
210}
211
212static void allow_cpu_feat(unsigned long nr)
213{
214 set_bit_inv(nr, kvm_s390_available_cpu_feat);
215}
216
217static inline int plo_test_bit(unsigned char nr)
218{
219 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220 int cc;
221
222 asm volatile(
223 /* Parameter registers are ignored for "test bit" */
224 " plo 0,0,0,0(0)\n"
225 " ipm %0\n"
226 " srl %0,28\n"
227 : "=d" (cc)
228 : "d" (r0)
229 : "cc");
230 return cc == 0;
231}
232
233static void kvm_s390_cpu_feat_init(void)
234{
235 int i;
236
237 for (i = 0; i < 256; ++i) {
238 if (plo_test_bit(i))
239 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240 }
241
242 if (test_facility(28)) /* TOD-clock steering */
243 ptff(kvm_s390_available_subfunc.ptff,
244 sizeof(kvm_s390_available_subfunc.ptff),
245 PTFF_QAF);
246
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249 kvm_s390_available_subfunc.kmac);
250 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251 kvm_s390_available_subfunc.kmc);
252 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.km);
254 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kimd);
256 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.klmd);
258 }
259 if (test_facility(76)) /* MSA3 */
260 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.pckmo);
262 if (test_facility(77)) { /* MSA4 */
263 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.kmctr);
265 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266 kvm_s390_available_subfunc.kmf);
267 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmo);
269 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.pcc);
271 }
272 if (test_facility(57)) /* MSA5 */
273 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.ppno);
275
276 if (MACHINE_HAS_ESOP)
277 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278 /*
279 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281 */
282 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283 !test_facility(3) || !nested)
284 return;
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286 if (sclp.has_64bscao)
287 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288 if (sclp.has_siif)
289 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290 if (sclp.has_gpere)
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292 if (sclp.has_gsls)
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294 if (sclp.has_ib)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296 if (sclp.has_cei)
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298 if (sclp.has_ibs)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300 /*
301 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302 * all skey handling functions read/set the skey from the PGSTE
303 * instead of the real storage key.
304 *
305 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306 * pages being detected as preserved although they are resident.
307 *
308 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310 *
311 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314 *
315 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316 * cannot easily shadow the SCA because of the ipte lock.
317 */
318}
319
320int kvm_arch_init(void *opaque)
321{
322 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323 if (!kvm_s390_dbf)
324 return -ENOMEM;
325
326 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327 debug_unregister(kvm_s390_dbf);
328 return -ENOMEM;
329 }
330
331 kvm_s390_cpu_feat_init();
332
333 /* Register floating interrupt controller interface. */
334 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335}
336
337void kvm_arch_exit(void)
338{
339 debug_unregister(kvm_s390_dbf);
340}
341
342/* Section: device related */
343long kvm_arch_dev_ioctl(struct file *filp,
344 unsigned int ioctl, unsigned long arg)
345{
346 if (ioctl == KVM_S390_ENABLE_SIE)
347 return s390_enable_sie();
348 return -EINVAL;
349}
350
351int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352{
353 int r;
354
355 switch (ext) {
356 case KVM_CAP_S390_PSW:
357 case KVM_CAP_S390_GMAP:
358 case KVM_CAP_SYNC_MMU:
359#ifdef CONFIG_KVM_S390_UCONTROL
360 case KVM_CAP_S390_UCONTROL:
361#endif
362 case KVM_CAP_ASYNC_PF:
363 case KVM_CAP_SYNC_REGS:
364 case KVM_CAP_ONE_REG:
365 case KVM_CAP_ENABLE_CAP:
366 case KVM_CAP_S390_CSS_SUPPORT:
367 case KVM_CAP_IOEVENTFD:
368 case KVM_CAP_DEVICE_CTRL:
369 case KVM_CAP_ENABLE_CAP_VM:
370 case KVM_CAP_S390_IRQCHIP:
371 case KVM_CAP_VM_ATTRIBUTES:
372 case KVM_CAP_MP_STATE:
373 case KVM_CAP_S390_INJECT_IRQ:
374 case KVM_CAP_S390_USER_SIGP:
375 case KVM_CAP_S390_USER_STSI:
376 case KVM_CAP_S390_SKEYS:
377 case KVM_CAP_S390_IRQ_STATE:
378 case KVM_CAP_S390_USER_INSTR0:
379 r = 1;
380 break;
381 case KVM_CAP_S390_MEM_OP:
382 r = MEM_OP_MAX_SIZE;
383 break;
384 case KVM_CAP_NR_VCPUS:
385 case KVM_CAP_MAX_VCPUS:
386 r = KVM_S390_BSCA_CPU_SLOTS;
387 if (!kvm_s390_use_sca_entries())
388 r = KVM_MAX_VCPUS;
389 else if (sclp.has_esca && sclp.has_64bscao)
390 r = KVM_S390_ESCA_CPU_SLOTS;
391 break;
392 case KVM_CAP_NR_MEMSLOTS:
393 r = KVM_USER_MEM_SLOTS;
394 break;
395 case KVM_CAP_S390_COW:
396 r = MACHINE_HAS_ESOP;
397 break;
398 case KVM_CAP_S390_VECTOR_REGISTERS:
399 r = MACHINE_HAS_VX;
400 break;
401 case KVM_CAP_S390_RI:
402 r = test_facility(64);
403 break;
404 default:
405 r = 0;
406 }
407 return r;
408}
409
410static void kvm_s390_sync_dirty_log(struct kvm *kvm,
411 struct kvm_memory_slot *memslot)
412{
413 gfn_t cur_gfn, last_gfn;
414 unsigned long address;
415 struct gmap *gmap = kvm->arch.gmap;
416
417 /* Loop over all guest pages */
418 last_gfn = memslot->base_gfn + memslot->npages;
419 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
420 address = gfn_to_hva_memslot(memslot, cur_gfn);
421
422 if (test_and_clear_guest_dirty(gmap->mm, address))
423 mark_page_dirty(kvm, cur_gfn);
424 if (fatal_signal_pending(current))
425 return;
426 cond_resched();
427 }
428}
429
430/* Section: vm related */
431static void sca_del_vcpu(struct kvm_vcpu *vcpu);
432
433/*
434 * Get (and clear) the dirty memory log for a memory slot.
435 */
436int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
437 struct kvm_dirty_log *log)
438{
439 int r;
440 unsigned long n;
441 struct kvm_memslots *slots;
442 struct kvm_memory_slot *memslot;
443 int is_dirty = 0;
444
445 mutex_lock(&kvm->slots_lock);
446
447 r = -EINVAL;
448 if (log->slot >= KVM_USER_MEM_SLOTS)
449 goto out;
450
451 slots = kvm_memslots(kvm);
452 memslot = id_to_memslot(slots, log->slot);
453 r = -ENOENT;
454 if (!memslot->dirty_bitmap)
455 goto out;
456
457 kvm_s390_sync_dirty_log(kvm, memslot);
458 r = kvm_get_dirty_log(kvm, log, &is_dirty);
459 if (r)
460 goto out;
461
462 /* Clear the dirty log */
463 if (is_dirty) {
464 n = kvm_dirty_bitmap_bytes(memslot);
465 memset(memslot->dirty_bitmap, 0, n);
466 }
467 r = 0;
468out:
469 mutex_unlock(&kvm->slots_lock);
470 return r;
471}
472
473static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
474{
475 unsigned int i;
476 struct kvm_vcpu *vcpu;
477
478 kvm_for_each_vcpu(i, vcpu, kvm) {
479 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
480 }
481}
482
483static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
484{
485 int r;
486
487 if (cap->flags)
488 return -EINVAL;
489
490 switch (cap->cap) {
491 case KVM_CAP_S390_IRQCHIP:
492 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
493 kvm->arch.use_irqchip = 1;
494 r = 0;
495 break;
496 case KVM_CAP_S390_USER_SIGP:
497 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
498 kvm->arch.user_sigp = 1;
499 r = 0;
500 break;
501 case KVM_CAP_S390_VECTOR_REGISTERS:
502 mutex_lock(&kvm->lock);
503 if (kvm->created_vcpus) {
504 r = -EBUSY;
505 } else if (MACHINE_HAS_VX) {
506 set_kvm_facility(kvm->arch.model.fac_mask, 129);
507 set_kvm_facility(kvm->arch.model.fac_list, 129);
508 if (test_facility(134)) {
509 set_kvm_facility(kvm->arch.model.fac_mask, 134);
510 set_kvm_facility(kvm->arch.model.fac_list, 134);
511 }
512 if (test_facility(135)) {
513 set_kvm_facility(kvm->arch.model.fac_mask, 135);
514 set_kvm_facility(kvm->arch.model.fac_list, 135);
515 }
516 r = 0;
517 } else
518 r = -EINVAL;
519 mutex_unlock(&kvm->lock);
520 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
521 r ? "(not available)" : "(success)");
522 break;
523 case KVM_CAP_S390_RI:
524 r = -EINVAL;
525 mutex_lock(&kvm->lock);
526 if (kvm->created_vcpus) {
527 r = -EBUSY;
528 } else if (test_facility(64)) {
529 set_kvm_facility(kvm->arch.model.fac_mask, 64);
530 set_kvm_facility(kvm->arch.model.fac_list, 64);
531 r = 0;
532 }
533 mutex_unlock(&kvm->lock);
534 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
535 r ? "(not available)" : "(success)");
536 break;
537 case KVM_CAP_S390_USER_STSI:
538 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
539 kvm->arch.user_stsi = 1;
540 r = 0;
541 break;
542 case KVM_CAP_S390_USER_INSTR0:
543 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
544 kvm->arch.user_instr0 = 1;
545 icpt_operexc_on_all_vcpus(kvm);
546 r = 0;
547 break;
548 default:
549 r = -EINVAL;
550 break;
551 }
552 return r;
553}
554
555static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
556{
557 int ret;
558
559 switch (attr->attr) {
560 case KVM_S390_VM_MEM_LIMIT_SIZE:
561 ret = 0;
562 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
563 kvm->arch.mem_limit);
564 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
565 ret = -EFAULT;
566 break;
567 default:
568 ret = -ENXIO;
569 break;
570 }
571 return ret;
572}
573
574static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
575{
576 int ret;
577 unsigned int idx;
578 switch (attr->attr) {
579 case KVM_S390_VM_MEM_ENABLE_CMMA:
580 ret = -ENXIO;
581 if (!sclp.has_cmma)
582 break;
583
584 ret = -EBUSY;
585 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
586 mutex_lock(&kvm->lock);
587 if (!kvm->created_vcpus) {
588 kvm->arch.use_cmma = 1;
589 ret = 0;
590 }
591 mutex_unlock(&kvm->lock);
592 break;
593 case KVM_S390_VM_MEM_CLR_CMMA:
594 ret = -ENXIO;
595 if (!sclp.has_cmma)
596 break;
597 ret = -EINVAL;
598 if (!kvm->arch.use_cmma)
599 break;
600
601 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
602 mutex_lock(&kvm->lock);
603 idx = srcu_read_lock(&kvm->srcu);
604 s390_reset_cmma(kvm->arch.gmap->mm);
605 srcu_read_unlock(&kvm->srcu, idx);
606 mutex_unlock(&kvm->lock);
607 ret = 0;
608 break;
609 case KVM_S390_VM_MEM_LIMIT_SIZE: {
610 unsigned long new_limit;
611
612 if (kvm_is_ucontrol(kvm))
613 return -EINVAL;
614
615 if (get_user(new_limit, (u64 __user *)attr->addr))
616 return -EFAULT;
617
618 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
619 new_limit > kvm->arch.mem_limit)
620 return -E2BIG;
621
622 if (!new_limit)
623 return -EINVAL;
624
625 /* gmap_create takes last usable address */
626 if (new_limit != KVM_S390_NO_MEM_LIMIT)
627 new_limit -= 1;
628
629 ret = -EBUSY;
630 mutex_lock(&kvm->lock);
631 if (!kvm->created_vcpus) {
632 /* gmap_create will round the limit up */
633 struct gmap *new = gmap_create(current->mm, new_limit);
634
635 if (!new) {
636 ret = -ENOMEM;
637 } else {
638 gmap_remove(kvm->arch.gmap);
639 new->private = kvm;
640 kvm->arch.gmap = new;
641 ret = 0;
642 }
643 }
644 mutex_unlock(&kvm->lock);
645 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
646 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
647 (void *) kvm->arch.gmap->asce);
648 break;
649 }
650 default:
651 ret = -ENXIO;
652 break;
653 }
654 return ret;
655}
656
657static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
658
659static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
660{
661 struct kvm_vcpu *vcpu;
662 int i;
663
664 if (!test_kvm_facility(kvm, 76))
665 return -EINVAL;
666
667 mutex_lock(&kvm->lock);
668 switch (attr->attr) {
669 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
670 get_random_bytes(
671 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
672 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
673 kvm->arch.crypto.aes_kw = 1;
674 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
675 break;
676 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
677 get_random_bytes(
678 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
679 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
680 kvm->arch.crypto.dea_kw = 1;
681 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
682 break;
683 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
684 kvm->arch.crypto.aes_kw = 0;
685 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
686 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
687 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
688 break;
689 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
690 kvm->arch.crypto.dea_kw = 0;
691 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
692 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
693 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
694 break;
695 default:
696 mutex_unlock(&kvm->lock);
697 return -ENXIO;
698 }
699
700 kvm_for_each_vcpu(i, vcpu, kvm) {
701 kvm_s390_vcpu_crypto_setup(vcpu);
702 exit_sie(vcpu);
703 }
704 mutex_unlock(&kvm->lock);
705 return 0;
706}
707
708static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
709{
710 u8 gtod_high;
711
712 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
713 sizeof(gtod_high)))
714 return -EFAULT;
715
716 if (gtod_high != 0)
717 return -EINVAL;
718 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
719
720 return 0;
721}
722
723static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
724{
725 u64 gtod;
726
727 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
728 return -EFAULT;
729
730 kvm_s390_set_tod_clock(kvm, gtod);
731 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
732 return 0;
733}
734
735static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
736{
737 int ret;
738
739 if (attr->flags)
740 return -EINVAL;
741
742 switch (attr->attr) {
743 case KVM_S390_VM_TOD_HIGH:
744 ret = kvm_s390_set_tod_high(kvm, attr);
745 break;
746 case KVM_S390_VM_TOD_LOW:
747 ret = kvm_s390_set_tod_low(kvm, attr);
748 break;
749 default:
750 ret = -ENXIO;
751 break;
752 }
753 return ret;
754}
755
756static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
757{
758 u8 gtod_high = 0;
759
760 if (copy_to_user((void __user *)attr->addr, &gtod_high,
761 sizeof(gtod_high)))
762 return -EFAULT;
763 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
764
765 return 0;
766}
767
768static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
769{
770 u64 gtod;
771
772 gtod = kvm_s390_get_tod_clock_fast(kvm);
773 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
774 return -EFAULT;
775 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
776
777 return 0;
778}
779
780static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
781{
782 int ret;
783
784 if (attr->flags)
785 return -EINVAL;
786
787 switch (attr->attr) {
788 case KVM_S390_VM_TOD_HIGH:
789 ret = kvm_s390_get_tod_high(kvm, attr);
790 break;
791 case KVM_S390_VM_TOD_LOW:
792 ret = kvm_s390_get_tod_low(kvm, attr);
793 break;
794 default:
795 ret = -ENXIO;
796 break;
797 }
798 return ret;
799}
800
801static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
802{
803 struct kvm_s390_vm_cpu_processor *proc;
804 u16 lowest_ibc, unblocked_ibc;
805 int ret = 0;
806
807 mutex_lock(&kvm->lock);
808 if (kvm->created_vcpus) {
809 ret = -EBUSY;
810 goto out;
811 }
812 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
813 if (!proc) {
814 ret = -ENOMEM;
815 goto out;
816 }
817 if (!copy_from_user(proc, (void __user *)attr->addr,
818 sizeof(*proc))) {
819 kvm->arch.model.cpuid = proc->cpuid;
820 lowest_ibc = sclp.ibc >> 16 & 0xfff;
821 unblocked_ibc = sclp.ibc & 0xfff;
822 if (lowest_ibc && proc->ibc) {
823 if (proc->ibc > unblocked_ibc)
824 kvm->arch.model.ibc = unblocked_ibc;
825 else if (proc->ibc < lowest_ibc)
826 kvm->arch.model.ibc = lowest_ibc;
827 else
828 kvm->arch.model.ibc = proc->ibc;
829 }
830 memcpy(kvm->arch.model.fac_list, proc->fac_list,
831 S390_ARCH_FAC_LIST_SIZE_BYTE);
832 } else
833 ret = -EFAULT;
834 kfree(proc);
835out:
836 mutex_unlock(&kvm->lock);
837 return ret;
838}
839
840static int kvm_s390_set_processor_feat(struct kvm *kvm,
841 struct kvm_device_attr *attr)
842{
843 struct kvm_s390_vm_cpu_feat data;
844 int ret = -EBUSY;
845
846 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
847 return -EFAULT;
848 if (!bitmap_subset((unsigned long *) data.feat,
849 kvm_s390_available_cpu_feat,
850 KVM_S390_VM_CPU_FEAT_NR_BITS))
851 return -EINVAL;
852
853 mutex_lock(&kvm->lock);
854 if (!atomic_read(&kvm->online_vcpus)) {
855 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
856 KVM_S390_VM_CPU_FEAT_NR_BITS);
857 ret = 0;
858 }
859 mutex_unlock(&kvm->lock);
860 return ret;
861}
862
863static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
864 struct kvm_device_attr *attr)
865{
866 /*
867 * Once supported by kernel + hw, we have to store the subfunctions
868 * in kvm->arch and remember that user space configured them.
869 */
870 return -ENXIO;
871}
872
873static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
874{
875 int ret = -ENXIO;
876
877 switch (attr->attr) {
878 case KVM_S390_VM_CPU_PROCESSOR:
879 ret = kvm_s390_set_processor(kvm, attr);
880 break;
881 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
882 ret = kvm_s390_set_processor_feat(kvm, attr);
883 break;
884 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
885 ret = kvm_s390_set_processor_subfunc(kvm, attr);
886 break;
887 }
888 return ret;
889}
890
891static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
892{
893 struct kvm_s390_vm_cpu_processor *proc;
894 int ret = 0;
895
896 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
897 if (!proc) {
898 ret = -ENOMEM;
899 goto out;
900 }
901 proc->cpuid = kvm->arch.model.cpuid;
902 proc->ibc = kvm->arch.model.ibc;
903 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
904 S390_ARCH_FAC_LIST_SIZE_BYTE);
905 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
906 ret = -EFAULT;
907 kfree(proc);
908out:
909 return ret;
910}
911
912static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
913{
914 struct kvm_s390_vm_cpu_machine *mach;
915 int ret = 0;
916
917 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
918 if (!mach) {
919 ret = -ENOMEM;
920 goto out;
921 }
922 get_cpu_id((struct cpuid *) &mach->cpuid);
923 mach->ibc = sclp.ibc;
924 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
925 S390_ARCH_FAC_LIST_SIZE_BYTE);
926 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
927 sizeof(S390_lowcore.stfle_fac_list));
928 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
929 ret = -EFAULT;
930 kfree(mach);
931out:
932 return ret;
933}
934
935static int kvm_s390_get_processor_feat(struct kvm *kvm,
936 struct kvm_device_attr *attr)
937{
938 struct kvm_s390_vm_cpu_feat data;
939
940 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
941 KVM_S390_VM_CPU_FEAT_NR_BITS);
942 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
943 return -EFAULT;
944 return 0;
945}
946
947static int kvm_s390_get_machine_feat(struct kvm *kvm,
948 struct kvm_device_attr *attr)
949{
950 struct kvm_s390_vm_cpu_feat data;
951
952 bitmap_copy((unsigned long *) data.feat,
953 kvm_s390_available_cpu_feat,
954 KVM_S390_VM_CPU_FEAT_NR_BITS);
955 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
956 return -EFAULT;
957 return 0;
958}
959
960static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
961 struct kvm_device_attr *attr)
962{
963 /*
964 * Once we can actually configure subfunctions (kernel + hw support),
965 * we have to check if they were already set by user space, if so copy
966 * them from kvm->arch.
967 */
968 return -ENXIO;
969}
970
971static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
972 struct kvm_device_attr *attr)
973{
974 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
975 sizeof(struct kvm_s390_vm_cpu_subfunc)))
976 return -EFAULT;
977 return 0;
978}
979static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
980{
981 int ret = -ENXIO;
982
983 switch (attr->attr) {
984 case KVM_S390_VM_CPU_PROCESSOR:
985 ret = kvm_s390_get_processor(kvm, attr);
986 break;
987 case KVM_S390_VM_CPU_MACHINE:
988 ret = kvm_s390_get_machine(kvm, attr);
989 break;
990 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
991 ret = kvm_s390_get_processor_feat(kvm, attr);
992 break;
993 case KVM_S390_VM_CPU_MACHINE_FEAT:
994 ret = kvm_s390_get_machine_feat(kvm, attr);
995 break;
996 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
997 ret = kvm_s390_get_processor_subfunc(kvm, attr);
998 break;
999 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1000 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1001 break;
1002 }
1003 return ret;
1004}
1005
1006static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1007{
1008 int ret;
1009
1010 switch (attr->group) {
1011 case KVM_S390_VM_MEM_CTRL:
1012 ret = kvm_s390_set_mem_control(kvm, attr);
1013 break;
1014 case KVM_S390_VM_TOD:
1015 ret = kvm_s390_set_tod(kvm, attr);
1016 break;
1017 case KVM_S390_VM_CPU_MODEL:
1018 ret = kvm_s390_set_cpu_model(kvm, attr);
1019 break;
1020 case KVM_S390_VM_CRYPTO:
1021 ret = kvm_s390_vm_set_crypto(kvm, attr);
1022 break;
1023 default:
1024 ret = -ENXIO;
1025 break;
1026 }
1027
1028 return ret;
1029}
1030
1031static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1032{
1033 int ret;
1034
1035 switch (attr->group) {
1036 case KVM_S390_VM_MEM_CTRL:
1037 ret = kvm_s390_get_mem_control(kvm, attr);
1038 break;
1039 case KVM_S390_VM_TOD:
1040 ret = kvm_s390_get_tod(kvm, attr);
1041 break;
1042 case KVM_S390_VM_CPU_MODEL:
1043 ret = kvm_s390_get_cpu_model(kvm, attr);
1044 break;
1045 default:
1046 ret = -ENXIO;
1047 break;
1048 }
1049
1050 return ret;
1051}
1052
1053static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1054{
1055 int ret;
1056
1057 switch (attr->group) {
1058 case KVM_S390_VM_MEM_CTRL:
1059 switch (attr->attr) {
1060 case KVM_S390_VM_MEM_ENABLE_CMMA:
1061 case KVM_S390_VM_MEM_CLR_CMMA:
1062 ret = sclp.has_cmma ? 0 : -ENXIO;
1063 break;
1064 case KVM_S390_VM_MEM_LIMIT_SIZE:
1065 ret = 0;
1066 break;
1067 default:
1068 ret = -ENXIO;
1069 break;
1070 }
1071 break;
1072 case KVM_S390_VM_TOD:
1073 switch (attr->attr) {
1074 case KVM_S390_VM_TOD_LOW:
1075 case KVM_S390_VM_TOD_HIGH:
1076 ret = 0;
1077 break;
1078 default:
1079 ret = -ENXIO;
1080 break;
1081 }
1082 break;
1083 case KVM_S390_VM_CPU_MODEL:
1084 switch (attr->attr) {
1085 case KVM_S390_VM_CPU_PROCESSOR:
1086 case KVM_S390_VM_CPU_MACHINE:
1087 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1088 case KVM_S390_VM_CPU_MACHINE_FEAT:
1089 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1090 ret = 0;
1091 break;
1092 /* configuring subfunctions is not supported yet */
1093 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1094 default:
1095 ret = -ENXIO;
1096 break;
1097 }
1098 break;
1099 case KVM_S390_VM_CRYPTO:
1100 switch (attr->attr) {
1101 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1102 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1103 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1104 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1105 ret = 0;
1106 break;
1107 default:
1108 ret = -ENXIO;
1109 break;
1110 }
1111 break;
1112 default:
1113 ret = -ENXIO;
1114 break;
1115 }
1116
1117 return ret;
1118}
1119
1120static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1121{
1122 uint8_t *keys;
1123 uint64_t hva;
1124 int i, r = 0;
1125
1126 if (args->flags != 0)
1127 return -EINVAL;
1128
1129 /* Is this guest using storage keys? */
1130 if (!mm_use_skey(current->mm))
1131 return KVM_S390_GET_SKEYS_NONE;
1132
1133 /* Enforce sane limit on memory allocation */
1134 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1135 return -EINVAL;
1136
1137 keys = kmalloc_array(args->count, sizeof(uint8_t),
1138 GFP_KERNEL | __GFP_NOWARN);
1139 if (!keys)
1140 keys = vmalloc(sizeof(uint8_t) * args->count);
1141 if (!keys)
1142 return -ENOMEM;
1143
1144 down_read(&current->mm->mmap_sem);
1145 for (i = 0; i < args->count; i++) {
1146 hva = gfn_to_hva(kvm, args->start_gfn + i);
1147 if (kvm_is_error_hva(hva)) {
1148 r = -EFAULT;
1149 break;
1150 }
1151
1152 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1153 if (r)
1154 break;
1155 }
1156 up_read(&current->mm->mmap_sem);
1157
1158 if (!r) {
1159 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1160 sizeof(uint8_t) * args->count);
1161 if (r)
1162 r = -EFAULT;
1163 }
1164
1165 kvfree(keys);
1166 return r;
1167}
1168
1169static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1170{
1171 uint8_t *keys;
1172 uint64_t hva;
1173 int i, r = 0;
1174
1175 if (args->flags != 0)
1176 return -EINVAL;
1177
1178 /* Enforce sane limit on memory allocation */
1179 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1180 return -EINVAL;
1181
1182 keys = kmalloc_array(args->count, sizeof(uint8_t),
1183 GFP_KERNEL | __GFP_NOWARN);
1184 if (!keys)
1185 keys = vmalloc(sizeof(uint8_t) * args->count);
1186 if (!keys)
1187 return -ENOMEM;
1188
1189 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1190 sizeof(uint8_t) * args->count);
1191 if (r) {
1192 r = -EFAULT;
1193 goto out;
1194 }
1195
1196 /* Enable storage key handling for the guest */
1197 r = s390_enable_skey();
1198 if (r)
1199 goto out;
1200
1201 down_read(&current->mm->mmap_sem);
1202 for (i = 0; i < args->count; i++) {
1203 hva = gfn_to_hva(kvm, args->start_gfn + i);
1204 if (kvm_is_error_hva(hva)) {
1205 r = -EFAULT;
1206 break;
1207 }
1208
1209 /* Lowest order bit is reserved */
1210 if (keys[i] & 0x01) {
1211 r = -EINVAL;
1212 break;
1213 }
1214
1215 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1216 if (r)
1217 break;
1218 }
1219 up_read(&current->mm->mmap_sem);
1220out:
1221 kvfree(keys);
1222 return r;
1223}
1224
1225long kvm_arch_vm_ioctl(struct file *filp,
1226 unsigned int ioctl, unsigned long arg)
1227{
1228 struct kvm *kvm = filp->private_data;
1229 void __user *argp = (void __user *)arg;
1230 struct kvm_device_attr attr;
1231 int r;
1232
1233 switch (ioctl) {
1234 case KVM_S390_INTERRUPT: {
1235 struct kvm_s390_interrupt s390int;
1236
1237 r = -EFAULT;
1238 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1239 break;
1240 r = kvm_s390_inject_vm(kvm, &s390int);
1241 break;
1242 }
1243 case KVM_ENABLE_CAP: {
1244 struct kvm_enable_cap cap;
1245 r = -EFAULT;
1246 if (copy_from_user(&cap, argp, sizeof(cap)))
1247 break;
1248 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1249 break;
1250 }
1251 case KVM_CREATE_IRQCHIP: {
1252 struct kvm_irq_routing_entry routing;
1253
1254 r = -EINVAL;
1255 if (kvm->arch.use_irqchip) {
1256 /* Set up dummy routing. */
1257 memset(&routing, 0, sizeof(routing));
1258 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1259 }
1260 break;
1261 }
1262 case KVM_SET_DEVICE_ATTR: {
1263 r = -EFAULT;
1264 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1265 break;
1266 r = kvm_s390_vm_set_attr(kvm, &attr);
1267 break;
1268 }
1269 case KVM_GET_DEVICE_ATTR: {
1270 r = -EFAULT;
1271 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1272 break;
1273 r = kvm_s390_vm_get_attr(kvm, &attr);
1274 break;
1275 }
1276 case KVM_HAS_DEVICE_ATTR: {
1277 r = -EFAULT;
1278 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1279 break;
1280 r = kvm_s390_vm_has_attr(kvm, &attr);
1281 break;
1282 }
1283 case KVM_S390_GET_SKEYS: {
1284 struct kvm_s390_skeys args;
1285
1286 r = -EFAULT;
1287 if (copy_from_user(&args, argp,
1288 sizeof(struct kvm_s390_skeys)))
1289 break;
1290 r = kvm_s390_get_skeys(kvm, &args);
1291 break;
1292 }
1293 case KVM_S390_SET_SKEYS: {
1294 struct kvm_s390_skeys args;
1295
1296 r = -EFAULT;
1297 if (copy_from_user(&args, argp,
1298 sizeof(struct kvm_s390_skeys)))
1299 break;
1300 r = kvm_s390_set_skeys(kvm, &args);
1301 break;
1302 }
1303 default:
1304 r = -ENOTTY;
1305 }
1306
1307 return r;
1308}
1309
1310static int kvm_s390_query_ap_config(u8 *config)
1311{
1312 u32 fcn_code = 0x04000000UL;
1313 u32 cc = 0;
1314
1315 memset(config, 0, 128);
1316 asm volatile(
1317 "lgr 0,%1\n"
1318 "lgr 2,%2\n"
1319 ".long 0xb2af0000\n" /* PQAP(QCI) */
1320 "0: ipm %0\n"
1321 "srl %0,28\n"
1322 "1:\n"
1323 EX_TABLE(0b, 1b)
1324 : "+r" (cc)
1325 : "r" (fcn_code), "r" (config)
1326 : "cc", "0", "2", "memory"
1327 );
1328
1329 return cc;
1330}
1331
1332static int kvm_s390_apxa_installed(void)
1333{
1334 u8 config[128];
1335 int cc;
1336
1337 if (test_facility(12)) {
1338 cc = kvm_s390_query_ap_config(config);
1339
1340 if (cc)
1341 pr_err("PQAP(QCI) failed with cc=%d", cc);
1342 else
1343 return config[0] & 0x40;
1344 }
1345
1346 return 0;
1347}
1348
1349static void kvm_s390_set_crycb_format(struct kvm *kvm)
1350{
1351 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1352
1353 if (kvm_s390_apxa_installed())
1354 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1355 else
1356 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1357}
1358
1359static u64 kvm_s390_get_initial_cpuid(void)
1360{
1361 struct cpuid cpuid;
1362
1363 get_cpu_id(&cpuid);
1364 cpuid.version = 0xff;
1365 return *((u64 *) &cpuid);
1366}
1367
1368static void kvm_s390_crypto_init(struct kvm *kvm)
1369{
1370 if (!test_kvm_facility(kvm, 76))
1371 return;
1372
1373 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1374 kvm_s390_set_crycb_format(kvm);
1375
1376 /* Enable AES/DEA protected key functions by default */
1377 kvm->arch.crypto.aes_kw = 1;
1378 kvm->arch.crypto.dea_kw = 1;
1379 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1380 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1381 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1382 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1383}
1384
1385static void sca_dispose(struct kvm *kvm)
1386{
1387 if (kvm->arch.use_esca)
1388 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1389 else
1390 free_page((unsigned long)(kvm->arch.sca));
1391 kvm->arch.sca = NULL;
1392}
1393
1394int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1395{
1396 gfp_t alloc_flags = GFP_KERNEL;
1397 int i, rc;
1398 char debug_name[16];
1399 static unsigned long sca_offset;
1400
1401 rc = -EINVAL;
1402#ifdef CONFIG_KVM_S390_UCONTROL
1403 if (type & ~KVM_VM_S390_UCONTROL)
1404 goto out_err;
1405 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1406 goto out_err;
1407#else
1408 if (type)
1409 goto out_err;
1410#endif
1411
1412 rc = s390_enable_sie();
1413 if (rc)
1414 goto out_err;
1415
1416 rc = -ENOMEM;
1417
1418 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1419
1420 kvm->arch.use_esca = 0; /* start with basic SCA */
1421 if (!sclp.has_64bscao)
1422 alloc_flags |= GFP_DMA;
1423 rwlock_init(&kvm->arch.sca_lock);
1424 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1425 if (!kvm->arch.sca)
1426 goto out_err;
1427 spin_lock(&kvm_lock);
1428 sca_offset += 16;
1429 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1430 sca_offset = 0;
1431 kvm->arch.sca = (struct bsca_block *)
1432 ((char *) kvm->arch.sca + sca_offset);
1433 spin_unlock(&kvm_lock);
1434
1435 sprintf(debug_name, "kvm-%u", current->pid);
1436
1437 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1438 if (!kvm->arch.dbf)
1439 goto out_err;
1440
1441 kvm->arch.sie_page2 =
1442 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1443 if (!kvm->arch.sie_page2)
1444 goto out_err;
1445
1446 /* Populate the facility mask initially. */
1447 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1448 sizeof(S390_lowcore.stfle_fac_list));
1449 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1450 if (i < kvm_s390_fac_list_mask_size())
1451 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1452 else
1453 kvm->arch.model.fac_mask[i] = 0UL;
1454 }
1455
1456 /* Populate the facility list initially. */
1457 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1458 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1459 S390_ARCH_FAC_LIST_SIZE_BYTE);
1460
1461 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1462 set_kvm_facility(kvm->arch.model.fac_list, 74);
1463
1464 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1465 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1466
1467 kvm_s390_crypto_init(kvm);
1468
1469 spin_lock_init(&kvm->arch.float_int.lock);
1470 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1471 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1472 init_waitqueue_head(&kvm->arch.ipte_wq);
1473 mutex_init(&kvm->arch.ipte_mutex);
1474
1475 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1476 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1477
1478 if (type & KVM_VM_S390_UCONTROL) {
1479 kvm->arch.gmap = NULL;
1480 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1481 } else {
1482 if (sclp.hamax == U64_MAX)
1483 kvm->arch.mem_limit = TASK_MAX_SIZE;
1484 else
1485 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1486 sclp.hamax + 1);
1487 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1488 if (!kvm->arch.gmap)
1489 goto out_err;
1490 kvm->arch.gmap->private = kvm;
1491 kvm->arch.gmap->pfault_enabled = 0;
1492 }
1493
1494 kvm->arch.css_support = 0;
1495 kvm->arch.use_irqchip = 0;
1496 kvm->arch.epoch = 0;
1497
1498 spin_lock_init(&kvm->arch.start_stop_lock);
1499 kvm_s390_vsie_init(kvm);
1500 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1501
1502 return 0;
1503out_err:
1504 free_page((unsigned long)kvm->arch.sie_page2);
1505 debug_unregister(kvm->arch.dbf);
1506 sca_dispose(kvm);
1507 KVM_EVENT(3, "creation of vm failed: %d", rc);
1508 return rc;
1509}
1510
1511bool kvm_arch_has_vcpu_debugfs(void)
1512{
1513 return false;
1514}
1515
1516int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1517{
1518 return 0;
1519}
1520
1521void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1522{
1523 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1524 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1525 kvm_s390_clear_local_irqs(vcpu);
1526 kvm_clear_async_pf_completion_queue(vcpu);
1527 if (!kvm_is_ucontrol(vcpu->kvm))
1528 sca_del_vcpu(vcpu);
1529
1530 if (kvm_is_ucontrol(vcpu->kvm))
1531 gmap_remove(vcpu->arch.gmap);
1532
1533 if (vcpu->kvm->arch.use_cmma)
1534 kvm_s390_vcpu_unsetup_cmma(vcpu);
1535 free_page((unsigned long)(vcpu->arch.sie_block));
1536
1537 kvm_vcpu_uninit(vcpu);
1538 kmem_cache_free(kvm_vcpu_cache, vcpu);
1539}
1540
1541static void kvm_free_vcpus(struct kvm *kvm)
1542{
1543 unsigned int i;
1544 struct kvm_vcpu *vcpu;
1545
1546 kvm_for_each_vcpu(i, vcpu, kvm)
1547 kvm_arch_vcpu_destroy(vcpu);
1548
1549 mutex_lock(&kvm->lock);
1550 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1551 kvm->vcpus[i] = NULL;
1552
1553 atomic_set(&kvm->online_vcpus, 0);
1554 mutex_unlock(&kvm->lock);
1555}
1556
1557void kvm_arch_destroy_vm(struct kvm *kvm)
1558{
1559 kvm_free_vcpus(kvm);
1560 sca_dispose(kvm);
1561 debug_unregister(kvm->arch.dbf);
1562 free_page((unsigned long)kvm->arch.sie_page2);
1563 if (!kvm_is_ucontrol(kvm))
1564 gmap_remove(kvm->arch.gmap);
1565 kvm_s390_destroy_adapters(kvm);
1566 kvm_s390_clear_float_irqs(kvm);
1567 kvm_s390_vsie_destroy(kvm);
1568 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1569}
1570
1571/* Section: vcpu related */
1572static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1573{
1574 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1575 if (!vcpu->arch.gmap)
1576 return -ENOMEM;
1577 vcpu->arch.gmap->private = vcpu->kvm;
1578
1579 return 0;
1580}
1581
1582static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1583{
1584 if (!kvm_s390_use_sca_entries())
1585 return;
1586 read_lock(&vcpu->kvm->arch.sca_lock);
1587 if (vcpu->kvm->arch.use_esca) {
1588 struct esca_block *sca = vcpu->kvm->arch.sca;
1589
1590 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1591 sca->cpu[vcpu->vcpu_id].sda = 0;
1592 } else {
1593 struct bsca_block *sca = vcpu->kvm->arch.sca;
1594
1595 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1596 sca->cpu[vcpu->vcpu_id].sda = 0;
1597 }
1598 read_unlock(&vcpu->kvm->arch.sca_lock);
1599}
1600
1601static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1602{
1603 if (!kvm_s390_use_sca_entries()) {
1604 struct bsca_block *sca = vcpu->kvm->arch.sca;
1605
1606 /* we still need the basic sca for the ipte control */
1607 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1608 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1609 }
1610 read_lock(&vcpu->kvm->arch.sca_lock);
1611 if (vcpu->kvm->arch.use_esca) {
1612 struct esca_block *sca = vcpu->kvm->arch.sca;
1613
1614 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1615 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1616 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1617 vcpu->arch.sie_block->ecb2 |= 0x04U;
1618 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1619 } else {
1620 struct bsca_block *sca = vcpu->kvm->arch.sca;
1621
1622 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1623 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1624 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1625 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1626 }
1627 read_unlock(&vcpu->kvm->arch.sca_lock);
1628}
1629
1630/* Basic SCA to Extended SCA data copy routines */
1631static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1632{
1633 d->sda = s->sda;
1634 d->sigp_ctrl.c = s->sigp_ctrl.c;
1635 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1636}
1637
1638static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1639{
1640 int i;
1641
1642 d->ipte_control = s->ipte_control;
1643 d->mcn[0] = s->mcn;
1644 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1645 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1646}
1647
1648static int sca_switch_to_extended(struct kvm *kvm)
1649{
1650 struct bsca_block *old_sca = kvm->arch.sca;
1651 struct esca_block *new_sca;
1652 struct kvm_vcpu *vcpu;
1653 unsigned int vcpu_idx;
1654 u32 scaol, scaoh;
1655
1656 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1657 if (!new_sca)
1658 return -ENOMEM;
1659
1660 scaoh = (u32)((u64)(new_sca) >> 32);
1661 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1662
1663 kvm_s390_vcpu_block_all(kvm);
1664 write_lock(&kvm->arch.sca_lock);
1665
1666 sca_copy_b_to_e(new_sca, old_sca);
1667
1668 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1669 vcpu->arch.sie_block->scaoh = scaoh;
1670 vcpu->arch.sie_block->scaol = scaol;
1671 vcpu->arch.sie_block->ecb2 |= 0x04U;
1672 }
1673 kvm->arch.sca = new_sca;
1674 kvm->arch.use_esca = 1;
1675
1676 write_unlock(&kvm->arch.sca_lock);
1677 kvm_s390_vcpu_unblock_all(kvm);
1678
1679 free_page((unsigned long)old_sca);
1680
1681 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1682 old_sca, kvm->arch.sca);
1683 return 0;
1684}
1685
1686static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1687{
1688 int rc;
1689
1690 if (!kvm_s390_use_sca_entries()) {
1691 if (id < KVM_MAX_VCPUS)
1692 return true;
1693 return false;
1694 }
1695 if (id < KVM_S390_BSCA_CPU_SLOTS)
1696 return true;
1697 if (!sclp.has_esca || !sclp.has_64bscao)
1698 return false;
1699
1700 mutex_lock(&kvm->lock);
1701 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1702 mutex_unlock(&kvm->lock);
1703
1704 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1705}
1706
1707int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1708{
1709 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1710 kvm_clear_async_pf_completion_queue(vcpu);
1711 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1712 KVM_SYNC_GPRS |
1713 KVM_SYNC_ACRS |
1714 KVM_SYNC_CRS |
1715 KVM_SYNC_ARCH0 |
1716 KVM_SYNC_PFAULT;
1717 kvm_s390_set_prefix(vcpu, 0);
1718 if (test_kvm_facility(vcpu->kvm, 64))
1719 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1720 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1721 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1722 */
1723 if (MACHINE_HAS_VX)
1724 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1725 else
1726 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1727
1728 if (kvm_is_ucontrol(vcpu->kvm))
1729 return __kvm_ucontrol_vcpu_init(vcpu);
1730
1731 return 0;
1732}
1733
1734/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1735static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1736{
1737 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1738 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1739 vcpu->arch.cputm_start = get_tod_clock_fast();
1740 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1741}
1742
1743/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1744static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1745{
1746 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1747 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1748 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1749 vcpu->arch.cputm_start = 0;
1750 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1751}
1752
1753/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1754static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1755{
1756 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1757 vcpu->arch.cputm_enabled = true;
1758 __start_cpu_timer_accounting(vcpu);
1759}
1760
1761/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1762static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1763{
1764 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1765 __stop_cpu_timer_accounting(vcpu);
1766 vcpu->arch.cputm_enabled = false;
1767}
1768
1769static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1770{
1771 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1772 __enable_cpu_timer_accounting(vcpu);
1773 preempt_enable();
1774}
1775
1776static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1777{
1778 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1779 __disable_cpu_timer_accounting(vcpu);
1780 preempt_enable();
1781}
1782
1783/* set the cpu timer - may only be called from the VCPU thread itself */
1784void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1785{
1786 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1787 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1788 if (vcpu->arch.cputm_enabled)
1789 vcpu->arch.cputm_start = get_tod_clock_fast();
1790 vcpu->arch.sie_block->cputm = cputm;
1791 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1792 preempt_enable();
1793}
1794
1795/* update and get the cpu timer - can also be called from other VCPU threads */
1796__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1797{
1798 unsigned int seq;
1799 __u64 value;
1800
1801 if (unlikely(!vcpu->arch.cputm_enabled))
1802 return vcpu->arch.sie_block->cputm;
1803
1804 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1805 do {
1806 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1807 /*
1808 * If the writer would ever execute a read in the critical
1809 * section, e.g. in irq context, we have a deadlock.
1810 */
1811 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1812 value = vcpu->arch.sie_block->cputm;
1813 /* if cputm_start is 0, accounting is being started/stopped */
1814 if (likely(vcpu->arch.cputm_start))
1815 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1816 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1817 preempt_enable();
1818 return value;
1819}
1820
1821void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1822{
1823
1824 gmap_enable(vcpu->arch.enabled_gmap);
1825 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1826 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1827 __start_cpu_timer_accounting(vcpu);
1828 vcpu->cpu = cpu;
1829}
1830
1831void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1832{
1833 vcpu->cpu = -1;
1834 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1835 __stop_cpu_timer_accounting(vcpu);
1836 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1837 vcpu->arch.enabled_gmap = gmap_get_enabled();
1838 gmap_disable(vcpu->arch.enabled_gmap);
1839
1840}
1841
1842static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1843{
1844 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1845 vcpu->arch.sie_block->gpsw.mask = 0UL;
1846 vcpu->arch.sie_block->gpsw.addr = 0UL;
1847 kvm_s390_set_prefix(vcpu, 0);
1848 kvm_s390_set_cpu_timer(vcpu, 0);
1849 vcpu->arch.sie_block->ckc = 0UL;
1850 vcpu->arch.sie_block->todpr = 0;
1851 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1852 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1853 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1854 /* make sure the new fpc will be lazily loaded */
1855 save_fpu_regs();
1856 current->thread.fpu.fpc = 0;
1857 vcpu->arch.sie_block->gbea = 1;
1858 vcpu->arch.sie_block->pp = 0;
1859 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1860 kvm_clear_async_pf_completion_queue(vcpu);
1861 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1862 kvm_s390_vcpu_stop(vcpu);
1863 kvm_s390_clear_local_irqs(vcpu);
1864}
1865
1866void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1867{
1868 mutex_lock(&vcpu->kvm->lock);
1869 preempt_disable();
1870 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1871 preempt_enable();
1872 mutex_unlock(&vcpu->kvm->lock);
1873 if (!kvm_is_ucontrol(vcpu->kvm)) {
1874 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1875 sca_add_vcpu(vcpu);
1876 }
1877 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1878 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1879 /* make vcpu_load load the right gmap on the first trigger */
1880 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1881}
1882
1883static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1884{
1885 if (!test_kvm_facility(vcpu->kvm, 76))
1886 return;
1887
1888 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1889
1890 if (vcpu->kvm->arch.crypto.aes_kw)
1891 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1892 if (vcpu->kvm->arch.crypto.dea_kw)
1893 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1894
1895 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1896}
1897
1898void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1899{
1900 free_page(vcpu->arch.sie_block->cbrlo);
1901 vcpu->arch.sie_block->cbrlo = 0;
1902}
1903
1904int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1905{
1906 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1907 if (!vcpu->arch.sie_block->cbrlo)
1908 return -ENOMEM;
1909
1910 vcpu->arch.sie_block->ecb2 |= 0x80;
1911 vcpu->arch.sie_block->ecb2 &= ~0x08;
1912 return 0;
1913}
1914
1915static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1916{
1917 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1918
1919 vcpu->arch.sie_block->ibc = model->ibc;
1920 if (test_kvm_facility(vcpu->kvm, 7))
1921 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1922}
1923
1924int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1925{
1926 int rc = 0;
1927
1928 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1929 CPUSTAT_SM |
1930 CPUSTAT_STOPPED);
1931
1932 if (test_kvm_facility(vcpu->kvm, 78))
1933 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1934 else if (test_kvm_facility(vcpu->kvm, 8))
1935 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1936
1937 kvm_s390_vcpu_setup_model(vcpu);
1938
1939 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1940 if (MACHINE_HAS_ESOP)
1941 vcpu->arch.sie_block->ecb |= 0x02;
1942 if (test_kvm_facility(vcpu->kvm, 9))
1943 vcpu->arch.sie_block->ecb |= 0x04;
1944 if (test_kvm_facility(vcpu->kvm, 73))
1945 vcpu->arch.sie_block->ecb |= 0x10;
1946
1947 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1948 vcpu->arch.sie_block->ecb2 |= 0x08;
1949 if (test_kvm_facility(vcpu->kvm, 130))
1950 vcpu->arch.sie_block->ecb2 |= 0x20;
1951 vcpu->arch.sie_block->eca = 0x1002000U;
1952 if (sclp.has_cei)
1953 vcpu->arch.sie_block->eca |= 0x80000000U;
1954 if (sclp.has_ib)
1955 vcpu->arch.sie_block->eca |= 0x40000000U;
1956 if (sclp.has_siif)
1957 vcpu->arch.sie_block->eca |= 1;
1958 if (sclp.has_sigpif)
1959 vcpu->arch.sie_block->eca |= 0x10000000U;
1960 if (test_kvm_facility(vcpu->kvm, 129)) {
1961 vcpu->arch.sie_block->eca |= 0x00020000;
1962 vcpu->arch.sie_block->ecd |= 0x20000000;
1963 }
1964 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1965 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1966
1967 if (vcpu->kvm->arch.use_cmma) {
1968 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1969 if (rc)
1970 return rc;
1971 }
1972 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1973 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1974
1975 kvm_s390_vcpu_crypto_setup(vcpu);
1976
1977 return rc;
1978}
1979
1980struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1981 unsigned int id)
1982{
1983 struct kvm_vcpu *vcpu;
1984 struct sie_page *sie_page;
1985 int rc = -EINVAL;
1986
1987 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1988 goto out;
1989
1990 rc = -ENOMEM;
1991
1992 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1993 if (!vcpu)
1994 goto out;
1995
1996 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1997 if (!sie_page)
1998 goto out_free_cpu;
1999
2000 vcpu->arch.sie_block = &sie_page->sie_block;
2001 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2002
2003 /* the real guest size will always be smaller than msl */
2004 vcpu->arch.sie_block->mso = 0;
2005 vcpu->arch.sie_block->msl = sclp.hamax;
2006
2007 vcpu->arch.sie_block->icpua = id;
2008 spin_lock_init(&vcpu->arch.local_int.lock);
2009 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2010 vcpu->arch.local_int.wq = &vcpu->wq;
2011 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2012 seqcount_init(&vcpu->arch.cputm_seqcount);
2013
2014 rc = kvm_vcpu_init(vcpu, kvm, id);
2015 if (rc)
2016 goto out_free_sie_block;
2017 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2018 vcpu->arch.sie_block);
2019 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2020
2021 return vcpu;
2022out_free_sie_block:
2023 free_page((unsigned long)(vcpu->arch.sie_block));
2024out_free_cpu:
2025 kmem_cache_free(kvm_vcpu_cache, vcpu);
2026out:
2027 return ERR_PTR(rc);
2028}
2029
2030int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2031{
2032 return kvm_s390_vcpu_has_irq(vcpu, 0);
2033}
2034
2035void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2036{
2037 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2038 exit_sie(vcpu);
2039}
2040
2041void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2042{
2043 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2044}
2045
2046static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2047{
2048 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2049 exit_sie(vcpu);
2050}
2051
2052static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2053{
2054 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2055}
2056
2057/*
2058 * Kick a guest cpu out of SIE and wait until SIE is not running.
2059 * If the CPU is not running (e.g. waiting as idle) the function will
2060 * return immediately. */
2061void exit_sie(struct kvm_vcpu *vcpu)
2062{
2063 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2064 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2065 cpu_relax();
2066}
2067
2068/* Kick a guest cpu out of SIE to process a request synchronously */
2069void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2070{
2071 kvm_make_request(req, vcpu);
2072 kvm_s390_vcpu_request(vcpu);
2073}
2074
2075static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2076 unsigned long end)
2077{
2078 struct kvm *kvm = gmap->private;
2079 struct kvm_vcpu *vcpu;
2080 unsigned long prefix;
2081 int i;
2082
2083 if (gmap_is_shadow(gmap))
2084 return;
2085 if (start >= 1UL << 31)
2086 /* We are only interested in prefix pages */
2087 return;
2088 kvm_for_each_vcpu(i, vcpu, kvm) {
2089 /* match against both prefix pages */
2090 prefix = kvm_s390_get_prefix(vcpu);
2091 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2092 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2093 start, end);
2094 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2095 }
2096 }
2097}
2098
2099int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2100{
2101 /* kvm common code refers to this, but never calls it */
2102 BUG();
2103 return 0;
2104}
2105
2106static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2107 struct kvm_one_reg *reg)
2108{
2109 int r = -EINVAL;
2110
2111 switch (reg->id) {
2112 case KVM_REG_S390_TODPR:
2113 r = put_user(vcpu->arch.sie_block->todpr,
2114 (u32 __user *)reg->addr);
2115 break;
2116 case KVM_REG_S390_EPOCHDIFF:
2117 r = put_user(vcpu->arch.sie_block->epoch,
2118 (u64 __user *)reg->addr);
2119 break;
2120 case KVM_REG_S390_CPU_TIMER:
2121 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2122 (u64 __user *)reg->addr);
2123 break;
2124 case KVM_REG_S390_CLOCK_COMP:
2125 r = put_user(vcpu->arch.sie_block->ckc,
2126 (u64 __user *)reg->addr);
2127 break;
2128 case KVM_REG_S390_PFTOKEN:
2129 r = put_user(vcpu->arch.pfault_token,
2130 (u64 __user *)reg->addr);
2131 break;
2132 case KVM_REG_S390_PFCOMPARE:
2133 r = put_user(vcpu->arch.pfault_compare,
2134 (u64 __user *)reg->addr);
2135 break;
2136 case KVM_REG_S390_PFSELECT:
2137 r = put_user(vcpu->arch.pfault_select,
2138 (u64 __user *)reg->addr);
2139 break;
2140 case KVM_REG_S390_PP:
2141 r = put_user(vcpu->arch.sie_block->pp,
2142 (u64 __user *)reg->addr);
2143 break;
2144 case KVM_REG_S390_GBEA:
2145 r = put_user(vcpu->arch.sie_block->gbea,
2146 (u64 __user *)reg->addr);
2147 break;
2148 default:
2149 break;
2150 }
2151
2152 return r;
2153}
2154
2155static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2156 struct kvm_one_reg *reg)
2157{
2158 int r = -EINVAL;
2159 __u64 val;
2160
2161 switch (reg->id) {
2162 case KVM_REG_S390_TODPR:
2163 r = get_user(vcpu->arch.sie_block->todpr,
2164 (u32 __user *)reg->addr);
2165 break;
2166 case KVM_REG_S390_EPOCHDIFF:
2167 r = get_user(vcpu->arch.sie_block->epoch,
2168 (u64 __user *)reg->addr);
2169 break;
2170 case KVM_REG_S390_CPU_TIMER:
2171 r = get_user(val, (u64 __user *)reg->addr);
2172 if (!r)
2173 kvm_s390_set_cpu_timer(vcpu, val);
2174 break;
2175 case KVM_REG_S390_CLOCK_COMP:
2176 r = get_user(vcpu->arch.sie_block->ckc,
2177 (u64 __user *)reg->addr);
2178 break;
2179 case KVM_REG_S390_PFTOKEN:
2180 r = get_user(vcpu->arch.pfault_token,
2181 (u64 __user *)reg->addr);
2182 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2183 kvm_clear_async_pf_completion_queue(vcpu);
2184 break;
2185 case KVM_REG_S390_PFCOMPARE:
2186 r = get_user(vcpu->arch.pfault_compare,
2187 (u64 __user *)reg->addr);
2188 break;
2189 case KVM_REG_S390_PFSELECT:
2190 r = get_user(vcpu->arch.pfault_select,
2191 (u64 __user *)reg->addr);
2192 break;
2193 case KVM_REG_S390_PP:
2194 r = get_user(vcpu->arch.sie_block->pp,
2195 (u64 __user *)reg->addr);
2196 break;
2197 case KVM_REG_S390_GBEA:
2198 r = get_user(vcpu->arch.sie_block->gbea,
2199 (u64 __user *)reg->addr);
2200 break;
2201 default:
2202 break;
2203 }
2204
2205 return r;
2206}
2207
2208static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2209{
2210 kvm_s390_vcpu_initial_reset(vcpu);
2211 return 0;
2212}
2213
2214int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2215{
2216 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2217 return 0;
2218}
2219
2220int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2221{
2222 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2223 return 0;
2224}
2225
2226int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2227 struct kvm_sregs *sregs)
2228{
2229 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2230 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2231 return 0;
2232}
2233
2234int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2235 struct kvm_sregs *sregs)
2236{
2237 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2238 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2239 return 0;
2240}
2241
2242int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2243{
2244 if (test_fp_ctl(fpu->fpc))
2245 return -EINVAL;
2246 vcpu->run->s.regs.fpc = fpu->fpc;
2247 if (MACHINE_HAS_VX)
2248 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2249 (freg_t *) fpu->fprs);
2250 else
2251 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2252 return 0;
2253}
2254
2255int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2256{
2257 /* make sure we have the latest values */
2258 save_fpu_regs();
2259 if (MACHINE_HAS_VX)
2260 convert_vx_to_fp((freg_t *) fpu->fprs,
2261 (__vector128 *) vcpu->run->s.regs.vrs);
2262 else
2263 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2264 fpu->fpc = vcpu->run->s.regs.fpc;
2265 return 0;
2266}
2267
2268static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2269{
2270 int rc = 0;
2271
2272 if (!is_vcpu_stopped(vcpu))
2273 rc = -EBUSY;
2274 else {
2275 vcpu->run->psw_mask = psw.mask;
2276 vcpu->run->psw_addr = psw.addr;
2277 }
2278 return rc;
2279}
2280
2281int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2282 struct kvm_translation *tr)
2283{
2284 return -EINVAL; /* not implemented yet */
2285}
2286
2287#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2288 KVM_GUESTDBG_USE_HW_BP | \
2289 KVM_GUESTDBG_ENABLE)
2290
2291int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2292 struct kvm_guest_debug *dbg)
2293{
2294 int rc = 0;
2295
2296 vcpu->guest_debug = 0;
2297 kvm_s390_clear_bp_data(vcpu);
2298
2299 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2300 return -EINVAL;
2301 if (!sclp.has_gpere)
2302 return -EINVAL;
2303
2304 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2305 vcpu->guest_debug = dbg->control;
2306 /* enforce guest PER */
2307 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2308
2309 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2310 rc = kvm_s390_import_bp_data(vcpu, dbg);
2311 } else {
2312 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2313 vcpu->arch.guestdbg.last_bp = 0;
2314 }
2315
2316 if (rc) {
2317 vcpu->guest_debug = 0;
2318 kvm_s390_clear_bp_data(vcpu);
2319 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2320 }
2321
2322 return rc;
2323}
2324
2325int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2326 struct kvm_mp_state *mp_state)
2327{
2328 /* CHECK_STOP and LOAD are not supported yet */
2329 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2330 KVM_MP_STATE_OPERATING;
2331}
2332
2333int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2334 struct kvm_mp_state *mp_state)
2335{
2336 int rc = 0;
2337
2338 /* user space knows about this interface - let it control the state */
2339 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2340
2341 switch (mp_state->mp_state) {
2342 case KVM_MP_STATE_STOPPED:
2343 kvm_s390_vcpu_stop(vcpu);
2344 break;
2345 case KVM_MP_STATE_OPERATING:
2346 kvm_s390_vcpu_start(vcpu);
2347 break;
2348 case KVM_MP_STATE_LOAD:
2349 case KVM_MP_STATE_CHECK_STOP:
2350 /* fall through - CHECK_STOP and LOAD are not supported yet */
2351 default:
2352 rc = -ENXIO;
2353 }
2354
2355 return rc;
2356}
2357
2358static bool ibs_enabled(struct kvm_vcpu *vcpu)
2359{
2360 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2361}
2362
2363static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2364{
2365retry:
2366 kvm_s390_vcpu_request_handled(vcpu);
2367 if (!vcpu->requests)
2368 return 0;
2369 /*
2370 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2371 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2372 * This ensures that the ipte instruction for this request has
2373 * already finished. We might race against a second unmapper that
2374 * wants to set the blocking bit. Lets just retry the request loop.
2375 */
2376 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2377 int rc;
2378 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2379 kvm_s390_get_prefix(vcpu),
2380 PAGE_SIZE * 2, PROT_WRITE);
2381 if (rc) {
2382 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2383 return rc;
2384 }
2385 goto retry;
2386 }
2387
2388 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2389 vcpu->arch.sie_block->ihcpu = 0xffff;
2390 goto retry;
2391 }
2392
2393 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2394 if (!ibs_enabled(vcpu)) {
2395 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2396 atomic_or(CPUSTAT_IBS,
2397 &vcpu->arch.sie_block->cpuflags);
2398 }
2399 goto retry;
2400 }
2401
2402 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2403 if (ibs_enabled(vcpu)) {
2404 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2405 atomic_andnot(CPUSTAT_IBS,
2406 &vcpu->arch.sie_block->cpuflags);
2407 }
2408 goto retry;
2409 }
2410
2411 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2412 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2413 goto retry;
2414 }
2415
2416 /* nothing to do, just clear the request */
2417 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2418
2419 return 0;
2420}
2421
2422void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2423{
2424 struct kvm_vcpu *vcpu;
2425 int i;
2426
2427 mutex_lock(&kvm->lock);
2428 preempt_disable();
2429 kvm->arch.epoch = tod - get_tod_clock();
2430 kvm_s390_vcpu_block_all(kvm);
2431 kvm_for_each_vcpu(i, vcpu, kvm)
2432 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2433 kvm_s390_vcpu_unblock_all(kvm);
2434 preempt_enable();
2435 mutex_unlock(&kvm->lock);
2436}
2437
2438/**
2439 * kvm_arch_fault_in_page - fault-in guest page if necessary
2440 * @vcpu: The corresponding virtual cpu
2441 * @gpa: Guest physical address
2442 * @writable: Whether the page should be writable or not
2443 *
2444 * Make sure that a guest page has been faulted-in on the host.
2445 *
2446 * Return: Zero on success, negative error code otherwise.
2447 */
2448long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2449{
2450 return gmap_fault(vcpu->arch.gmap, gpa,
2451 writable ? FAULT_FLAG_WRITE : 0);
2452}
2453
2454static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2455 unsigned long token)
2456{
2457 struct kvm_s390_interrupt inti;
2458 struct kvm_s390_irq irq;
2459
2460 if (start_token) {
2461 irq.u.ext.ext_params2 = token;
2462 irq.type = KVM_S390_INT_PFAULT_INIT;
2463 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2464 } else {
2465 inti.type = KVM_S390_INT_PFAULT_DONE;
2466 inti.parm64 = token;
2467 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2468 }
2469}
2470
2471void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2472 struct kvm_async_pf *work)
2473{
2474 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2475 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2476}
2477
2478void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2479 struct kvm_async_pf *work)
2480{
2481 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2482 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2483}
2484
2485void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2486 struct kvm_async_pf *work)
2487{
2488 /* s390 will always inject the page directly */
2489}
2490
2491bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2492{
2493 /*
2494 * s390 will always inject the page directly,
2495 * but we still want check_async_completion to cleanup
2496 */
2497 return true;
2498}
2499
2500static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2501{
2502 hva_t hva;
2503 struct kvm_arch_async_pf arch;
2504 int rc;
2505
2506 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2507 return 0;
2508 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2509 vcpu->arch.pfault_compare)
2510 return 0;
2511 if (psw_extint_disabled(vcpu))
2512 return 0;
2513 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2514 return 0;
2515 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2516 return 0;
2517 if (!vcpu->arch.gmap->pfault_enabled)
2518 return 0;
2519
2520 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2521 hva += current->thread.gmap_addr & ~PAGE_MASK;
2522 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2523 return 0;
2524
2525 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2526 return rc;
2527}
2528
2529static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2530{
2531 int rc, cpuflags;
2532
2533 /*
2534 * On s390 notifications for arriving pages will be delivered directly
2535 * to the guest but the house keeping for completed pfaults is
2536 * handled outside the worker.
2537 */
2538 kvm_check_async_pf_completion(vcpu);
2539
2540 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2541 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2542
2543 if (need_resched())
2544 schedule();
2545
2546 if (test_cpu_flag(CIF_MCCK_PENDING))
2547 s390_handle_mcck();
2548
2549 if (!kvm_is_ucontrol(vcpu->kvm)) {
2550 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2551 if (rc)
2552 return rc;
2553 }
2554
2555 rc = kvm_s390_handle_requests(vcpu);
2556 if (rc)
2557 return rc;
2558
2559 if (guestdbg_enabled(vcpu)) {
2560 kvm_s390_backup_guest_per_regs(vcpu);
2561 kvm_s390_patch_guest_per_regs(vcpu);
2562 }
2563
2564 vcpu->arch.sie_block->icptcode = 0;
2565 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2566 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2567 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2568
2569 return 0;
2570}
2571
2572static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2573{
2574 struct kvm_s390_pgm_info pgm_info = {
2575 .code = PGM_ADDRESSING,
2576 };
2577 u8 opcode, ilen;
2578 int rc;
2579
2580 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2581 trace_kvm_s390_sie_fault(vcpu);
2582
2583 /*
2584 * We want to inject an addressing exception, which is defined as a
2585 * suppressing or terminating exception. However, since we came here
2586 * by a DAT access exception, the PSW still points to the faulting
2587 * instruction since DAT exceptions are nullifying. So we've got
2588 * to look up the current opcode to get the length of the instruction
2589 * to be able to forward the PSW.
2590 */
2591 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2592 ilen = insn_length(opcode);
2593 if (rc < 0) {
2594 return rc;
2595 } else if (rc) {
2596 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2597 * Forward by arbitrary ilc, injection will take care of
2598 * nullification if necessary.
2599 */
2600 pgm_info = vcpu->arch.pgm;
2601 ilen = 4;
2602 }
2603 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2604 kvm_s390_forward_psw(vcpu, ilen);
2605 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2606}
2607
2608static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2609{
2610 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2611 vcpu->arch.sie_block->icptcode);
2612 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2613
2614 if (guestdbg_enabled(vcpu))
2615 kvm_s390_restore_guest_per_regs(vcpu);
2616
2617 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2618 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2619
2620 if (vcpu->arch.sie_block->icptcode > 0) {
2621 int rc = kvm_handle_sie_intercept(vcpu);
2622
2623 if (rc != -EOPNOTSUPP)
2624 return rc;
2625 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2626 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2627 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2628 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2629 return -EREMOTE;
2630 } else if (exit_reason != -EFAULT) {
2631 vcpu->stat.exit_null++;
2632 return 0;
2633 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2634 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2635 vcpu->run->s390_ucontrol.trans_exc_code =
2636 current->thread.gmap_addr;
2637 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2638 return -EREMOTE;
2639 } else if (current->thread.gmap_pfault) {
2640 trace_kvm_s390_major_guest_pfault(vcpu);
2641 current->thread.gmap_pfault = 0;
2642 if (kvm_arch_setup_async_pf(vcpu))
2643 return 0;
2644 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2645 }
2646 return vcpu_post_run_fault_in_sie(vcpu);
2647}
2648
2649static int __vcpu_run(struct kvm_vcpu *vcpu)
2650{
2651 int rc, exit_reason;
2652
2653 /*
2654 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2655 * ning the guest), so that memslots (and other stuff) are protected
2656 */
2657 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2658
2659 do {
2660 rc = vcpu_pre_run(vcpu);
2661 if (rc)
2662 break;
2663
2664 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2665 /*
2666 * As PF_VCPU will be used in fault handler, between
2667 * guest_enter and guest_exit should be no uaccess.
2668 */
2669 local_irq_disable();
2670 guest_enter_irqoff();
2671 __disable_cpu_timer_accounting(vcpu);
2672 local_irq_enable();
2673 exit_reason = sie64a(vcpu->arch.sie_block,
2674 vcpu->run->s.regs.gprs);
2675 local_irq_disable();
2676 __enable_cpu_timer_accounting(vcpu);
2677 guest_exit_irqoff();
2678 local_irq_enable();
2679 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2680
2681 rc = vcpu_post_run(vcpu, exit_reason);
2682 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2683
2684 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2685 return rc;
2686}
2687
2688static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2689{
2690 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2691 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2692 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2693 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2694 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2695 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2696 /* some control register changes require a tlb flush */
2697 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2698 }
2699 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2700 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2701 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2702 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2703 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2704 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2705 }
2706 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2707 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2708 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2709 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2710 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2711 kvm_clear_async_pf_completion_queue(vcpu);
2712 }
2713 /*
2714 * If userspace sets the riccb (e.g. after migration) to a valid state,
2715 * we should enable RI here instead of doing the lazy enablement.
2716 */
2717 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2718 test_kvm_facility(vcpu->kvm, 64)) {
2719 struct runtime_instr_cb *riccb =
2720 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2721
2722 if (riccb->valid)
2723 vcpu->arch.sie_block->ecb3 |= 0x01;
2724 }
2725 save_access_regs(vcpu->arch.host_acrs);
2726 restore_access_regs(vcpu->run->s.regs.acrs);
2727 /* save host (userspace) fprs/vrs */
2728 save_fpu_regs();
2729 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2730 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2731 if (MACHINE_HAS_VX)
2732 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2733 else
2734 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2735 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2736 if (test_fp_ctl(current->thread.fpu.fpc))
2737 /* User space provided an invalid FPC, let's clear it */
2738 current->thread.fpu.fpc = 0;
2739
2740 kvm_run->kvm_dirty_regs = 0;
2741}
2742
2743static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2744{
2745 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2746 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2747 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2748 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2749 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2750 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2751 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2752 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2753 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2754 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2755 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2756 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2757 save_access_regs(vcpu->run->s.regs.acrs);
2758 restore_access_regs(vcpu->arch.host_acrs);
2759 /* Save guest register state */
2760 save_fpu_regs();
2761 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2762 /* Restore will be done lazily at return */
2763 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2764 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2765
2766}
2767
2768int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2769{
2770 int rc;
2771 sigset_t sigsaved;
2772
2773 if (guestdbg_exit_pending(vcpu)) {
2774 kvm_s390_prepare_debug_exit(vcpu);
2775 return 0;
2776 }
2777
2778 if (vcpu->sigset_active)
2779 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2780
2781 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2782 kvm_s390_vcpu_start(vcpu);
2783 } else if (is_vcpu_stopped(vcpu)) {
2784 pr_err_ratelimited("can't run stopped vcpu %d\n",
2785 vcpu->vcpu_id);
2786 return -EINVAL;
2787 }
2788
2789 sync_regs(vcpu, kvm_run);
2790 enable_cpu_timer_accounting(vcpu);
2791
2792 might_fault();
2793 rc = __vcpu_run(vcpu);
2794
2795 if (signal_pending(current) && !rc) {
2796 kvm_run->exit_reason = KVM_EXIT_INTR;
2797 rc = -EINTR;
2798 }
2799
2800 if (guestdbg_exit_pending(vcpu) && !rc) {
2801 kvm_s390_prepare_debug_exit(vcpu);
2802 rc = 0;
2803 }
2804
2805 if (rc == -EREMOTE) {
2806 /* userspace support is needed, kvm_run has been prepared */
2807 rc = 0;
2808 }
2809
2810 disable_cpu_timer_accounting(vcpu);
2811 store_regs(vcpu, kvm_run);
2812
2813 if (vcpu->sigset_active)
2814 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2815
2816 vcpu->stat.exit_userspace++;
2817 return rc;
2818}
2819
2820/*
2821 * store status at address
2822 * we use have two special cases:
2823 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2824 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2825 */
2826int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2827{
2828 unsigned char archmode = 1;
2829 freg_t fprs[NUM_FPRS];
2830 unsigned int px;
2831 u64 clkcomp, cputm;
2832 int rc;
2833
2834 px = kvm_s390_get_prefix(vcpu);
2835 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2836 if (write_guest_abs(vcpu, 163, &archmode, 1))
2837 return -EFAULT;
2838 gpa = 0;
2839 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2840 if (write_guest_real(vcpu, 163, &archmode, 1))
2841 return -EFAULT;
2842 gpa = px;
2843 } else
2844 gpa -= __LC_FPREGS_SAVE_AREA;
2845
2846 /* manually convert vector registers if necessary */
2847 if (MACHINE_HAS_VX) {
2848 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2849 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2850 fprs, 128);
2851 } else {
2852 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2853 vcpu->run->s.regs.fprs, 128);
2854 }
2855 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2856 vcpu->run->s.regs.gprs, 128);
2857 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2858 &vcpu->arch.sie_block->gpsw, 16);
2859 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2860 &px, 4);
2861 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2862 &vcpu->run->s.regs.fpc, 4);
2863 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2864 &vcpu->arch.sie_block->todpr, 4);
2865 cputm = kvm_s390_get_cpu_timer(vcpu);
2866 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2867 &cputm, 8);
2868 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2869 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2870 &clkcomp, 8);
2871 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2872 &vcpu->run->s.regs.acrs, 64);
2873 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2874 &vcpu->arch.sie_block->gcr, 128);
2875 return rc ? -EFAULT : 0;
2876}
2877
2878int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2879{
2880 /*
2881 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2882 * switch in the run ioctl. Let's update our copies before we save
2883 * it into the save area
2884 */
2885 save_fpu_regs();
2886 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2887 save_access_regs(vcpu->run->s.regs.acrs);
2888
2889 return kvm_s390_store_status_unloaded(vcpu, addr);
2890}
2891
2892static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2893{
2894 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2895 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2896}
2897
2898static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2899{
2900 unsigned int i;
2901 struct kvm_vcpu *vcpu;
2902
2903 kvm_for_each_vcpu(i, vcpu, kvm) {
2904 __disable_ibs_on_vcpu(vcpu);
2905 }
2906}
2907
2908static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2909{
2910 if (!sclp.has_ibs)
2911 return;
2912 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2913 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2914}
2915
2916void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2917{
2918 int i, online_vcpus, started_vcpus = 0;
2919
2920 if (!is_vcpu_stopped(vcpu))
2921 return;
2922
2923 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2924 /* Only one cpu at a time may enter/leave the STOPPED state. */
2925 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2926 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2927
2928 for (i = 0; i < online_vcpus; i++) {
2929 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2930 started_vcpus++;
2931 }
2932
2933 if (started_vcpus == 0) {
2934 /* we're the only active VCPU -> speed it up */
2935 __enable_ibs_on_vcpu(vcpu);
2936 } else if (started_vcpus == 1) {
2937 /*
2938 * As we are starting a second VCPU, we have to disable
2939 * the IBS facility on all VCPUs to remove potentially
2940 * oustanding ENABLE requests.
2941 */
2942 __disable_ibs_on_all_vcpus(vcpu->kvm);
2943 }
2944
2945 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2946 /*
2947 * Another VCPU might have used IBS while we were offline.
2948 * Let's play safe and flush the VCPU at startup.
2949 */
2950 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2951 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2952 return;
2953}
2954
2955void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2956{
2957 int i, online_vcpus, started_vcpus = 0;
2958 struct kvm_vcpu *started_vcpu = NULL;
2959
2960 if (is_vcpu_stopped(vcpu))
2961 return;
2962
2963 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2964 /* Only one cpu at a time may enter/leave the STOPPED state. */
2965 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2966 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2967
2968 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2969 kvm_s390_clear_stop_irq(vcpu);
2970
2971 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2972 __disable_ibs_on_vcpu(vcpu);
2973
2974 for (i = 0; i < online_vcpus; i++) {
2975 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2976 started_vcpus++;
2977 started_vcpu = vcpu->kvm->vcpus[i];
2978 }
2979 }
2980
2981 if (started_vcpus == 1) {
2982 /*
2983 * As we only have one VCPU left, we want to enable the
2984 * IBS facility for that VCPU to speed it up.
2985 */
2986 __enable_ibs_on_vcpu(started_vcpu);
2987 }
2988
2989 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2990 return;
2991}
2992
2993static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2994 struct kvm_enable_cap *cap)
2995{
2996 int r;
2997
2998 if (cap->flags)
2999 return -EINVAL;
3000
3001 switch (cap->cap) {
3002 case KVM_CAP_S390_CSS_SUPPORT:
3003 if (!vcpu->kvm->arch.css_support) {
3004 vcpu->kvm->arch.css_support = 1;
3005 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3006 trace_kvm_s390_enable_css(vcpu->kvm);
3007 }
3008 r = 0;
3009 break;
3010 default:
3011 r = -EINVAL;
3012 break;
3013 }
3014 return r;
3015}
3016
3017static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3018 struct kvm_s390_mem_op *mop)
3019{
3020 void __user *uaddr = (void __user *)mop->buf;
3021 void *tmpbuf = NULL;
3022 int r, srcu_idx;
3023 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3024 | KVM_S390_MEMOP_F_CHECK_ONLY;
3025
3026 if (mop->flags & ~supported_flags)
3027 return -EINVAL;
3028
3029 if (mop->size > MEM_OP_MAX_SIZE)
3030 return -E2BIG;
3031
3032 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3033 tmpbuf = vmalloc(mop->size);
3034 if (!tmpbuf)
3035 return -ENOMEM;
3036 }
3037
3038 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3039
3040 switch (mop->op) {
3041 case KVM_S390_MEMOP_LOGICAL_READ:
3042 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3043 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3044 mop->size, GACC_FETCH);
3045 break;
3046 }
3047 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3048 if (r == 0) {
3049 if (copy_to_user(uaddr, tmpbuf, mop->size))
3050 r = -EFAULT;
3051 }
3052 break;
3053 case KVM_S390_MEMOP_LOGICAL_WRITE:
3054 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3055 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3056 mop->size, GACC_STORE);
3057 break;
3058 }
3059 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3060 r = -EFAULT;
3061 break;
3062 }
3063 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3064 break;
3065 default:
3066 r = -EINVAL;
3067 }
3068
3069 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3070
3071 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3072 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3073
3074 vfree(tmpbuf);
3075 return r;
3076}
3077
3078long kvm_arch_vcpu_ioctl(struct file *filp,
3079 unsigned int ioctl, unsigned long arg)
3080{
3081 struct kvm_vcpu *vcpu = filp->private_data;
3082 void __user *argp = (void __user *)arg;
3083 int idx;
3084 long r;
3085
3086 switch (ioctl) {
3087 case KVM_S390_IRQ: {
3088 struct kvm_s390_irq s390irq;
3089
3090 r = -EFAULT;
3091 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3092 break;
3093 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3094 break;
3095 }
3096 case KVM_S390_INTERRUPT: {
3097 struct kvm_s390_interrupt s390int;
3098 struct kvm_s390_irq s390irq;
3099
3100 r = -EFAULT;
3101 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3102 break;
3103 if (s390int_to_s390irq(&s390int, &s390irq))
3104 return -EINVAL;
3105 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3106 break;
3107 }
3108 case KVM_S390_STORE_STATUS:
3109 idx = srcu_read_lock(&vcpu->kvm->srcu);
3110 r = kvm_s390_vcpu_store_status(vcpu, arg);
3111 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3112 break;
3113 case KVM_S390_SET_INITIAL_PSW: {
3114 psw_t psw;
3115
3116 r = -EFAULT;
3117 if (copy_from_user(&psw, argp, sizeof(psw)))
3118 break;
3119 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3120 break;
3121 }
3122 case KVM_S390_INITIAL_RESET:
3123 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3124 break;
3125 case KVM_SET_ONE_REG:
3126 case KVM_GET_ONE_REG: {
3127 struct kvm_one_reg reg;
3128 r = -EFAULT;
3129 if (copy_from_user(&reg, argp, sizeof(reg)))
3130 break;
3131 if (ioctl == KVM_SET_ONE_REG)
3132 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3133 else
3134 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3135 break;
3136 }
3137#ifdef CONFIG_KVM_S390_UCONTROL
3138 case KVM_S390_UCAS_MAP: {
3139 struct kvm_s390_ucas_mapping ucasmap;
3140
3141 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3142 r = -EFAULT;
3143 break;
3144 }
3145
3146 if (!kvm_is_ucontrol(vcpu->kvm)) {
3147 r = -EINVAL;
3148 break;
3149 }
3150
3151 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3152 ucasmap.vcpu_addr, ucasmap.length);
3153 break;
3154 }
3155 case KVM_S390_UCAS_UNMAP: {
3156 struct kvm_s390_ucas_mapping ucasmap;
3157
3158 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3159 r = -EFAULT;
3160 break;
3161 }
3162
3163 if (!kvm_is_ucontrol(vcpu->kvm)) {
3164 r = -EINVAL;
3165 break;
3166 }
3167
3168 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3169 ucasmap.length);
3170 break;
3171 }
3172#endif
3173 case KVM_S390_VCPU_FAULT: {
3174 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3175 break;
3176 }
3177 case KVM_ENABLE_CAP:
3178 {
3179 struct kvm_enable_cap cap;
3180 r = -EFAULT;
3181 if (copy_from_user(&cap, argp, sizeof(cap)))
3182 break;
3183 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3184 break;
3185 }
3186 case KVM_S390_MEM_OP: {
3187 struct kvm_s390_mem_op mem_op;
3188
3189 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3190 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3191 else
3192 r = -EFAULT;
3193 break;
3194 }
3195 case KVM_S390_SET_IRQ_STATE: {
3196 struct kvm_s390_irq_state irq_state;
3197
3198 r = -EFAULT;
3199 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3200 break;
3201 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3202 irq_state.len == 0 ||
3203 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3204 r = -EINVAL;
3205 break;
3206 }
3207 r = kvm_s390_set_irq_state(vcpu,
3208 (void __user *) irq_state.buf,
3209 irq_state.len);
3210 break;
3211 }
3212 case KVM_S390_GET_IRQ_STATE: {
3213 struct kvm_s390_irq_state irq_state;
3214
3215 r = -EFAULT;
3216 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3217 break;
3218 if (irq_state.len == 0) {
3219 r = -EINVAL;
3220 break;
3221 }
3222 r = kvm_s390_get_irq_state(vcpu,
3223 (__u8 __user *) irq_state.buf,
3224 irq_state.len);
3225 break;
3226 }
3227 default:
3228 r = -ENOTTY;
3229 }
3230 return r;
3231}
3232
3233int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3234{
3235#ifdef CONFIG_KVM_S390_UCONTROL
3236 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3237 && (kvm_is_ucontrol(vcpu->kvm))) {
3238 vmf->page = virt_to_page(vcpu->arch.sie_block);
3239 get_page(vmf->page);
3240 return 0;
3241 }
3242#endif
3243 return VM_FAULT_SIGBUS;
3244}
3245
3246int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3247 unsigned long npages)
3248{
3249 return 0;
3250}
3251
3252/* Section: memory related */
3253int kvm_arch_prepare_memory_region(struct kvm *kvm,
3254 struct kvm_memory_slot *memslot,
3255 const struct kvm_userspace_memory_region *mem,
3256 enum kvm_mr_change change)
3257{
3258 /* A few sanity checks. We can have memory slots which have to be
3259 located/ended at a segment boundary (1MB). The memory in userland is
3260 ok to be fragmented into various different vmas. It is okay to mmap()
3261 and munmap() stuff in this slot after doing this call at any time */
3262
3263 if (mem->userspace_addr & 0xffffful)
3264 return -EINVAL;
3265
3266 if (mem->memory_size & 0xffffful)
3267 return -EINVAL;
3268
3269 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3270 return -EINVAL;
3271
3272 return 0;
3273}
3274
3275void kvm_arch_commit_memory_region(struct kvm *kvm,
3276 const struct kvm_userspace_memory_region *mem,
3277 const struct kvm_memory_slot *old,
3278 const struct kvm_memory_slot *new,
3279 enum kvm_mr_change change)
3280{
3281 int rc;
3282
3283 /* If the basics of the memslot do not change, we do not want
3284 * to update the gmap. Every update causes several unnecessary
3285 * segment translation exceptions. This is usually handled just
3286 * fine by the normal fault handler + gmap, but it will also
3287 * cause faults on the prefix page of running guest CPUs.
3288 */
3289 if (old->userspace_addr == mem->userspace_addr &&
3290 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3291 old->npages * PAGE_SIZE == mem->memory_size)
3292 return;
3293
3294 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3295 mem->guest_phys_addr, mem->memory_size);
3296 if (rc)
3297 pr_warn("failed to commit memory region\n");
3298 return;
3299}
3300
3301static inline unsigned long nonhyp_mask(int i)
3302{
3303 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3304
3305 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3306}
3307
3308void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3309{
3310 vcpu->valid_wakeup = false;
3311}
3312
3313static int __init kvm_s390_init(void)
3314{
3315 int i;
3316
3317 if (!sclp.has_sief2) {
3318 pr_info("SIE not available\n");
3319 return -ENODEV;
3320 }
3321
3322 for (i = 0; i < 16; i++)
3323 kvm_s390_fac_list_mask[i] |=
3324 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3325
3326 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3327}
3328
3329static void __exit kvm_s390_exit(void)
3330{
3331 kvm_exit();
3332}
3333
3334module_init(kvm_s390_init);
3335module_exit(kvm_s390_exit);
3336
3337/*
3338 * Enable autoloading of the kvm module.
3339 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3340 * since x86 takes a different approach.
3341 */
3342#include <linux/miscdevice.h>
3343MODULE_ALIAS_MISCDEV(KVM_MINOR);
3344MODULE_ALIAS("devname:kvm");