]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/s390/kvm/kvm-s390.c
ef6419654c162277dc761890424f27612a1d7187
[mirror_ubuntu-bionic-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 { NULL }
131 };
132
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144 return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159 /* every s390 is virtualization enabled ;-) */
160 return 0;
161 }
162
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164 unsigned long end);
165
166 /*
167 * This callback is executed during stop_machine(). All CPUs are therefore
168 * temporarily stopped. In order not to change guest behavior, we have to
169 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170 * so a CPU won't be stopped while calculating with the epoch.
171 */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173 void *v)
174 {
175 struct kvm *kvm;
176 struct kvm_vcpu *vcpu;
177 int i;
178 unsigned long long *delta = v;
179
180 list_for_each_entry(kvm, &vm_list, vm_list) {
181 kvm->arch.epoch -= *delta;
182 kvm_for_each_vcpu(i, vcpu, kvm) {
183 vcpu->arch.sie_block->epoch -= *delta;
184 if (vcpu->arch.cputm_enabled)
185 vcpu->arch.cputm_start += *delta;
186 if (vcpu->arch.vsie_block)
187 vcpu->arch.vsie_block->epoch -= *delta;
188 }
189 }
190 return NOTIFY_OK;
191 }
192
193 static struct notifier_block kvm_clock_notifier = {
194 .notifier_call = kvm_clock_sync,
195 };
196
197 int kvm_arch_hardware_setup(void)
198 {
199 gmap_notifier.notifier_call = kvm_gmap_notifier;
200 gmap_register_pte_notifier(&gmap_notifier);
201 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202 gmap_register_pte_notifier(&vsie_gmap_notifier);
203 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204 &kvm_clock_notifier);
205 return 0;
206 }
207
208 void kvm_arch_hardware_unsetup(void)
209 {
210 gmap_unregister_pte_notifier(&gmap_notifier);
211 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213 &kvm_clock_notifier);
214 }
215
216 static void allow_cpu_feat(unsigned long nr)
217 {
218 set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220
221 static inline int plo_test_bit(unsigned char nr)
222 {
223 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224 int cc;
225
226 asm volatile(
227 /* Parameter registers are ignored for "test bit" */
228 " plo 0,0,0,0(0)\n"
229 " ipm %0\n"
230 " srl %0,28\n"
231 : "=d" (cc)
232 : "d" (r0)
233 : "cc");
234 return cc == 0;
235 }
236
237 static void kvm_s390_cpu_feat_init(void)
238 {
239 int i;
240
241 for (i = 0; i < 256; ++i) {
242 if (plo_test_bit(i))
243 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244 }
245
246 if (test_facility(28)) /* TOD-clock steering */
247 ptff(kvm_s390_available_subfunc.ptff,
248 sizeof(kvm_s390_available_subfunc.ptff),
249 PTFF_QAF);
250
251 if (test_facility(17)) { /* MSA */
252 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.kmac);
254 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kmc);
256 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.km);
258 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kimd);
260 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.klmd);
262 }
263 if (test_facility(76)) /* MSA3 */
264 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.pckmo);
266 if (test_facility(77)) { /* MSA4 */
267 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmctr);
269 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.kmf);
271 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272 kvm_s390_available_subfunc.kmo);
273 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.pcc);
275 }
276 if (test_facility(57)) /* MSA5 */
277 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.ppno);
279
280 if (test_facility(146)) /* MSA8 */
281 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282 kvm_s390_available_subfunc.kma);
283
284 if (MACHINE_HAS_ESOP)
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286 /*
287 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289 */
290 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291 !test_facility(3) || !nested)
292 return;
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294 if (sclp.has_64bscao)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296 if (sclp.has_siif)
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298 if (sclp.has_gpere)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300 if (sclp.has_gsls)
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302 if (sclp.has_ib)
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304 if (sclp.has_cei)
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306 if (sclp.has_ibs)
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308 if (sclp.has_kss)
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310 /*
311 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312 * all skey handling functions read/set the skey from the PGSTE
313 * instead of the real storage key.
314 *
315 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316 * pages being detected as preserved although they are resident.
317 *
318 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320 *
321 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324 *
325 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326 * cannot easily shadow the SCA because of the ipte lock.
327 */
328 }
329
330 int kvm_arch_init(void *opaque)
331 {
332 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333 if (!kvm_s390_dbf)
334 return -ENOMEM;
335
336 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337 debug_unregister(kvm_s390_dbf);
338 return -ENOMEM;
339 }
340
341 kvm_s390_cpu_feat_init();
342
343 /* Register floating interrupt controller interface. */
344 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346
347 void kvm_arch_exit(void)
348 {
349 debug_unregister(kvm_s390_dbf);
350 }
351
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354 unsigned int ioctl, unsigned long arg)
355 {
356 if (ioctl == KVM_S390_ENABLE_SIE)
357 return s390_enable_sie();
358 return -EINVAL;
359 }
360
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363 int r;
364
365 switch (ext) {
366 case KVM_CAP_S390_PSW:
367 case KVM_CAP_S390_GMAP:
368 case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370 case KVM_CAP_S390_UCONTROL:
371 #endif
372 case KVM_CAP_ASYNC_PF:
373 case KVM_CAP_SYNC_REGS:
374 case KVM_CAP_ONE_REG:
375 case KVM_CAP_ENABLE_CAP:
376 case KVM_CAP_S390_CSS_SUPPORT:
377 case KVM_CAP_IOEVENTFD:
378 case KVM_CAP_DEVICE_CTRL:
379 case KVM_CAP_ENABLE_CAP_VM:
380 case KVM_CAP_S390_IRQCHIP:
381 case KVM_CAP_VM_ATTRIBUTES:
382 case KVM_CAP_MP_STATE:
383 case KVM_CAP_IMMEDIATE_EXIT:
384 case KVM_CAP_S390_INJECT_IRQ:
385 case KVM_CAP_S390_USER_SIGP:
386 case KVM_CAP_S390_USER_STSI:
387 case KVM_CAP_S390_SKEYS:
388 case KVM_CAP_S390_IRQ_STATE:
389 case KVM_CAP_S390_USER_INSTR0:
390 case KVM_CAP_S390_CMMA_MIGRATION:
391 case KVM_CAP_S390_AIS:
392 r = 1;
393 break;
394 case KVM_CAP_S390_MEM_OP:
395 r = MEM_OP_MAX_SIZE;
396 break;
397 case KVM_CAP_NR_VCPUS:
398 case KVM_CAP_MAX_VCPUS:
399 r = KVM_S390_BSCA_CPU_SLOTS;
400 if (!kvm_s390_use_sca_entries())
401 r = KVM_MAX_VCPUS;
402 else if (sclp.has_esca && sclp.has_64bscao)
403 r = KVM_S390_ESCA_CPU_SLOTS;
404 break;
405 case KVM_CAP_NR_MEMSLOTS:
406 r = KVM_USER_MEM_SLOTS;
407 break;
408 case KVM_CAP_S390_COW:
409 r = MACHINE_HAS_ESOP;
410 break;
411 case KVM_CAP_S390_VECTOR_REGISTERS:
412 r = MACHINE_HAS_VX;
413 break;
414 case KVM_CAP_S390_RI:
415 r = test_facility(64);
416 break;
417 case KVM_CAP_S390_GS:
418 r = test_facility(133);
419 break;
420 default:
421 r = 0;
422 }
423 return r;
424 }
425
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427 struct kvm_memory_slot *memslot)
428 {
429 gfn_t cur_gfn, last_gfn;
430 unsigned long address;
431 struct gmap *gmap = kvm->arch.gmap;
432
433 /* Loop over all guest pages */
434 last_gfn = memslot->base_gfn + memslot->npages;
435 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436 address = gfn_to_hva_memslot(memslot, cur_gfn);
437
438 if (test_and_clear_guest_dirty(gmap->mm, address))
439 mark_page_dirty(kvm, cur_gfn);
440 if (fatal_signal_pending(current))
441 return;
442 cond_resched();
443 }
444 }
445
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448
449 /*
450 * Get (and clear) the dirty memory log for a memory slot.
451 */
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453 struct kvm_dirty_log *log)
454 {
455 int r;
456 unsigned long n;
457 struct kvm_memslots *slots;
458 struct kvm_memory_slot *memslot;
459 int is_dirty = 0;
460
461 if (kvm_is_ucontrol(kvm))
462 return -EINVAL;
463
464 mutex_lock(&kvm->slots_lock);
465
466 r = -EINVAL;
467 if (log->slot >= KVM_USER_MEM_SLOTS)
468 goto out;
469
470 slots = kvm_memslots(kvm);
471 memslot = id_to_memslot(slots, log->slot);
472 r = -ENOENT;
473 if (!memslot->dirty_bitmap)
474 goto out;
475
476 kvm_s390_sync_dirty_log(kvm, memslot);
477 r = kvm_get_dirty_log(kvm, log, &is_dirty);
478 if (r)
479 goto out;
480
481 /* Clear the dirty log */
482 if (is_dirty) {
483 n = kvm_dirty_bitmap_bytes(memslot);
484 memset(memslot->dirty_bitmap, 0, n);
485 }
486 r = 0;
487 out:
488 mutex_unlock(&kvm->slots_lock);
489 return r;
490 }
491
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 {
494 unsigned int i;
495 struct kvm_vcpu *vcpu;
496
497 kvm_for_each_vcpu(i, vcpu, kvm) {
498 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
499 }
500 }
501
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
503 {
504 int r;
505
506 if (cap->flags)
507 return -EINVAL;
508
509 switch (cap->cap) {
510 case KVM_CAP_S390_IRQCHIP:
511 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512 kvm->arch.use_irqchip = 1;
513 r = 0;
514 break;
515 case KVM_CAP_S390_USER_SIGP:
516 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517 kvm->arch.user_sigp = 1;
518 r = 0;
519 break;
520 case KVM_CAP_S390_VECTOR_REGISTERS:
521 mutex_lock(&kvm->lock);
522 if (kvm->created_vcpus) {
523 r = -EBUSY;
524 } else if (MACHINE_HAS_VX) {
525 set_kvm_facility(kvm->arch.model.fac_mask, 129);
526 set_kvm_facility(kvm->arch.model.fac_list, 129);
527 if (test_facility(134)) {
528 set_kvm_facility(kvm->arch.model.fac_mask, 134);
529 set_kvm_facility(kvm->arch.model.fac_list, 134);
530 }
531 if (test_facility(135)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 135);
533 set_kvm_facility(kvm->arch.model.fac_list, 135);
534 }
535 r = 0;
536 } else
537 r = -EINVAL;
538 mutex_unlock(&kvm->lock);
539 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540 r ? "(not available)" : "(success)");
541 break;
542 case KVM_CAP_S390_RI:
543 r = -EINVAL;
544 mutex_lock(&kvm->lock);
545 if (kvm->created_vcpus) {
546 r = -EBUSY;
547 } else if (test_facility(64)) {
548 set_kvm_facility(kvm->arch.model.fac_mask, 64);
549 set_kvm_facility(kvm->arch.model.fac_list, 64);
550 r = 0;
551 }
552 mutex_unlock(&kvm->lock);
553 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554 r ? "(not available)" : "(success)");
555 break;
556 case KVM_CAP_S390_AIS:
557 mutex_lock(&kvm->lock);
558 if (kvm->created_vcpus) {
559 r = -EBUSY;
560 } else {
561 set_kvm_facility(kvm->arch.model.fac_mask, 72);
562 set_kvm_facility(kvm->arch.model.fac_list, 72);
563 kvm->arch.float_int.ais_enabled = 1;
564 r = 0;
565 }
566 mutex_unlock(&kvm->lock);
567 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
568 r ? "(not available)" : "(success)");
569 break;
570 case KVM_CAP_S390_GS:
571 r = -EINVAL;
572 mutex_lock(&kvm->lock);
573 if (atomic_read(&kvm->online_vcpus)) {
574 r = -EBUSY;
575 } else if (test_facility(133)) {
576 set_kvm_facility(kvm->arch.model.fac_mask, 133);
577 set_kvm_facility(kvm->arch.model.fac_list, 133);
578 r = 0;
579 }
580 mutex_unlock(&kvm->lock);
581 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
582 r ? "(not available)" : "(success)");
583 break;
584 case KVM_CAP_S390_USER_STSI:
585 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
586 kvm->arch.user_stsi = 1;
587 r = 0;
588 break;
589 case KVM_CAP_S390_USER_INSTR0:
590 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
591 kvm->arch.user_instr0 = 1;
592 icpt_operexc_on_all_vcpus(kvm);
593 r = 0;
594 break;
595 default:
596 r = -EINVAL;
597 break;
598 }
599 return r;
600 }
601
602 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
603 {
604 int ret;
605
606 switch (attr->attr) {
607 case KVM_S390_VM_MEM_LIMIT_SIZE:
608 ret = 0;
609 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
610 kvm->arch.mem_limit);
611 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
612 ret = -EFAULT;
613 break;
614 default:
615 ret = -ENXIO;
616 break;
617 }
618 return ret;
619 }
620
621 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
622 {
623 int ret;
624 unsigned int idx;
625 switch (attr->attr) {
626 case KVM_S390_VM_MEM_ENABLE_CMMA:
627 ret = -ENXIO;
628 if (!sclp.has_cmma)
629 break;
630
631 ret = -EBUSY;
632 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
633 mutex_lock(&kvm->lock);
634 if (!kvm->created_vcpus) {
635 kvm->arch.use_cmma = 1;
636 ret = 0;
637 }
638 mutex_unlock(&kvm->lock);
639 break;
640 case KVM_S390_VM_MEM_CLR_CMMA:
641 ret = -ENXIO;
642 if (!sclp.has_cmma)
643 break;
644 ret = -EINVAL;
645 if (!kvm->arch.use_cmma)
646 break;
647
648 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
649 mutex_lock(&kvm->lock);
650 idx = srcu_read_lock(&kvm->srcu);
651 s390_reset_cmma(kvm->arch.gmap->mm);
652 srcu_read_unlock(&kvm->srcu, idx);
653 mutex_unlock(&kvm->lock);
654 ret = 0;
655 break;
656 case KVM_S390_VM_MEM_LIMIT_SIZE: {
657 unsigned long new_limit;
658
659 if (kvm_is_ucontrol(kvm))
660 return -EINVAL;
661
662 if (get_user(new_limit, (u64 __user *)attr->addr))
663 return -EFAULT;
664
665 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
666 new_limit > kvm->arch.mem_limit)
667 return -E2BIG;
668
669 if (!new_limit)
670 return -EINVAL;
671
672 /* gmap_create takes last usable address */
673 if (new_limit != KVM_S390_NO_MEM_LIMIT)
674 new_limit -= 1;
675
676 ret = -EBUSY;
677 mutex_lock(&kvm->lock);
678 if (!kvm->created_vcpus) {
679 /* gmap_create will round the limit up */
680 struct gmap *new = gmap_create(current->mm, new_limit);
681
682 if (!new) {
683 ret = -ENOMEM;
684 } else {
685 gmap_remove(kvm->arch.gmap);
686 new->private = kvm;
687 kvm->arch.gmap = new;
688 ret = 0;
689 }
690 }
691 mutex_unlock(&kvm->lock);
692 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
693 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
694 (void *) kvm->arch.gmap->asce);
695 break;
696 }
697 default:
698 ret = -ENXIO;
699 break;
700 }
701 return ret;
702 }
703
704 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
705
706 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
707 {
708 struct kvm_vcpu *vcpu;
709 int i;
710
711 if (!test_kvm_facility(kvm, 76))
712 return -EINVAL;
713
714 mutex_lock(&kvm->lock);
715 switch (attr->attr) {
716 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
717 get_random_bytes(
718 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
719 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
720 kvm->arch.crypto.aes_kw = 1;
721 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
722 break;
723 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
724 get_random_bytes(
725 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
726 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
727 kvm->arch.crypto.dea_kw = 1;
728 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
729 break;
730 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
731 kvm->arch.crypto.aes_kw = 0;
732 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
733 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
734 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
735 break;
736 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
737 kvm->arch.crypto.dea_kw = 0;
738 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
739 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
740 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
741 break;
742 default:
743 mutex_unlock(&kvm->lock);
744 return -ENXIO;
745 }
746
747 kvm_for_each_vcpu(i, vcpu, kvm) {
748 kvm_s390_vcpu_crypto_setup(vcpu);
749 exit_sie(vcpu);
750 }
751 mutex_unlock(&kvm->lock);
752 return 0;
753 }
754
755 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
756 {
757 int cx;
758 struct kvm_vcpu *vcpu;
759
760 kvm_for_each_vcpu(cx, vcpu, kvm)
761 kvm_s390_sync_request(req, vcpu);
762 }
763
764 /*
765 * Must be called with kvm->srcu held to avoid races on memslots, and with
766 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
767 */
768 static int kvm_s390_vm_start_migration(struct kvm *kvm)
769 {
770 struct kvm_s390_migration_state *mgs;
771 struct kvm_memory_slot *ms;
772 /* should be the only one */
773 struct kvm_memslots *slots;
774 unsigned long ram_pages;
775 int slotnr;
776
777 /* migration mode already enabled */
778 if (kvm->arch.migration_state)
779 return 0;
780
781 slots = kvm_memslots(kvm);
782 if (!slots || !slots->used_slots)
783 return -EINVAL;
784
785 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
786 if (!mgs)
787 return -ENOMEM;
788 kvm->arch.migration_state = mgs;
789
790 if (kvm->arch.use_cmma) {
791 /*
792 * Get the last slot. They should be sorted by base_gfn, so the
793 * last slot is also the one at the end of the address space.
794 * We have verified above that at least one slot is present.
795 */
796 ms = slots->memslots + slots->used_slots - 1;
797 /* round up so we only use full longs */
798 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
799 /* allocate enough bytes to store all the bits */
800 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
801 if (!mgs->pgste_bitmap) {
802 kfree(mgs);
803 kvm->arch.migration_state = NULL;
804 return -ENOMEM;
805 }
806
807 mgs->bitmap_size = ram_pages;
808 atomic64_set(&mgs->dirty_pages, ram_pages);
809 /* mark all the pages in active slots as dirty */
810 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
811 ms = slots->memslots + slotnr;
812 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
813 }
814
815 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
816 }
817 return 0;
818 }
819
820 /*
821 * Must be called with kvm->lock to avoid races with ourselves and
822 * kvm_s390_vm_start_migration.
823 */
824 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
825 {
826 struct kvm_s390_migration_state *mgs;
827
828 /* migration mode already disabled */
829 if (!kvm->arch.migration_state)
830 return 0;
831 mgs = kvm->arch.migration_state;
832 kvm->arch.migration_state = NULL;
833
834 if (kvm->arch.use_cmma) {
835 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
836 vfree(mgs->pgste_bitmap);
837 }
838 kfree(mgs);
839 return 0;
840 }
841
842 static int kvm_s390_vm_set_migration(struct kvm *kvm,
843 struct kvm_device_attr *attr)
844 {
845 int idx, res = -ENXIO;
846
847 mutex_lock(&kvm->lock);
848 switch (attr->attr) {
849 case KVM_S390_VM_MIGRATION_START:
850 idx = srcu_read_lock(&kvm->srcu);
851 res = kvm_s390_vm_start_migration(kvm);
852 srcu_read_unlock(&kvm->srcu, idx);
853 break;
854 case KVM_S390_VM_MIGRATION_STOP:
855 res = kvm_s390_vm_stop_migration(kvm);
856 break;
857 default:
858 break;
859 }
860 mutex_unlock(&kvm->lock);
861
862 return res;
863 }
864
865 static int kvm_s390_vm_get_migration(struct kvm *kvm,
866 struct kvm_device_attr *attr)
867 {
868 u64 mig = (kvm->arch.migration_state != NULL);
869
870 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
871 return -ENXIO;
872
873 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
874 return -EFAULT;
875 return 0;
876 }
877
878 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
879 {
880 u8 gtod_high;
881
882 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
883 sizeof(gtod_high)))
884 return -EFAULT;
885
886 if (gtod_high != 0)
887 return -EINVAL;
888 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
889
890 return 0;
891 }
892
893 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
894 {
895 u64 gtod;
896
897 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
898 return -EFAULT;
899
900 kvm_s390_set_tod_clock(kvm, gtod);
901 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
902 return 0;
903 }
904
905 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907 int ret;
908
909 if (attr->flags)
910 return -EINVAL;
911
912 switch (attr->attr) {
913 case KVM_S390_VM_TOD_HIGH:
914 ret = kvm_s390_set_tod_high(kvm, attr);
915 break;
916 case KVM_S390_VM_TOD_LOW:
917 ret = kvm_s390_set_tod_low(kvm, attr);
918 break;
919 default:
920 ret = -ENXIO;
921 break;
922 }
923 return ret;
924 }
925
926 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
927 {
928 u8 gtod_high = 0;
929
930 if (copy_to_user((void __user *)attr->addr, &gtod_high,
931 sizeof(gtod_high)))
932 return -EFAULT;
933 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
934
935 return 0;
936 }
937
938 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
939 {
940 u64 gtod;
941
942 gtod = kvm_s390_get_tod_clock_fast(kvm);
943 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
944 return -EFAULT;
945 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
946
947 return 0;
948 }
949
950 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
951 {
952 int ret;
953
954 if (attr->flags)
955 return -EINVAL;
956
957 switch (attr->attr) {
958 case KVM_S390_VM_TOD_HIGH:
959 ret = kvm_s390_get_tod_high(kvm, attr);
960 break;
961 case KVM_S390_VM_TOD_LOW:
962 ret = kvm_s390_get_tod_low(kvm, attr);
963 break;
964 default:
965 ret = -ENXIO;
966 break;
967 }
968 return ret;
969 }
970
971 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
972 {
973 struct kvm_s390_vm_cpu_processor *proc;
974 u16 lowest_ibc, unblocked_ibc;
975 int ret = 0;
976
977 mutex_lock(&kvm->lock);
978 if (kvm->created_vcpus) {
979 ret = -EBUSY;
980 goto out;
981 }
982 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
983 if (!proc) {
984 ret = -ENOMEM;
985 goto out;
986 }
987 if (!copy_from_user(proc, (void __user *)attr->addr,
988 sizeof(*proc))) {
989 kvm->arch.model.cpuid = proc->cpuid;
990 lowest_ibc = sclp.ibc >> 16 & 0xfff;
991 unblocked_ibc = sclp.ibc & 0xfff;
992 if (lowest_ibc && proc->ibc) {
993 if (proc->ibc > unblocked_ibc)
994 kvm->arch.model.ibc = unblocked_ibc;
995 else if (proc->ibc < lowest_ibc)
996 kvm->arch.model.ibc = lowest_ibc;
997 else
998 kvm->arch.model.ibc = proc->ibc;
999 }
1000 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1001 S390_ARCH_FAC_LIST_SIZE_BYTE);
1002 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1003 kvm->arch.model.ibc,
1004 kvm->arch.model.cpuid);
1005 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1006 kvm->arch.model.fac_list[0],
1007 kvm->arch.model.fac_list[1],
1008 kvm->arch.model.fac_list[2]);
1009 } else
1010 ret = -EFAULT;
1011 kfree(proc);
1012 out:
1013 mutex_unlock(&kvm->lock);
1014 return ret;
1015 }
1016
1017 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1018 struct kvm_device_attr *attr)
1019 {
1020 struct kvm_s390_vm_cpu_feat data;
1021 int ret = -EBUSY;
1022
1023 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1024 return -EFAULT;
1025 if (!bitmap_subset((unsigned long *) data.feat,
1026 kvm_s390_available_cpu_feat,
1027 KVM_S390_VM_CPU_FEAT_NR_BITS))
1028 return -EINVAL;
1029
1030 mutex_lock(&kvm->lock);
1031 if (!atomic_read(&kvm->online_vcpus)) {
1032 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1033 KVM_S390_VM_CPU_FEAT_NR_BITS);
1034 ret = 0;
1035 }
1036 mutex_unlock(&kvm->lock);
1037 return ret;
1038 }
1039
1040 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1041 struct kvm_device_attr *attr)
1042 {
1043 /*
1044 * Once supported by kernel + hw, we have to store the subfunctions
1045 * in kvm->arch and remember that user space configured them.
1046 */
1047 return -ENXIO;
1048 }
1049
1050 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1051 {
1052 int ret = -ENXIO;
1053
1054 switch (attr->attr) {
1055 case KVM_S390_VM_CPU_PROCESSOR:
1056 ret = kvm_s390_set_processor(kvm, attr);
1057 break;
1058 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1059 ret = kvm_s390_set_processor_feat(kvm, attr);
1060 break;
1061 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1062 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1063 break;
1064 }
1065 return ret;
1066 }
1067
1068 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1069 {
1070 struct kvm_s390_vm_cpu_processor *proc;
1071 int ret = 0;
1072
1073 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1074 if (!proc) {
1075 ret = -ENOMEM;
1076 goto out;
1077 }
1078 proc->cpuid = kvm->arch.model.cpuid;
1079 proc->ibc = kvm->arch.model.ibc;
1080 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1081 S390_ARCH_FAC_LIST_SIZE_BYTE);
1082 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1083 kvm->arch.model.ibc,
1084 kvm->arch.model.cpuid);
1085 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1086 kvm->arch.model.fac_list[0],
1087 kvm->arch.model.fac_list[1],
1088 kvm->arch.model.fac_list[2]);
1089 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1090 ret = -EFAULT;
1091 kfree(proc);
1092 out:
1093 return ret;
1094 }
1095
1096 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1097 {
1098 struct kvm_s390_vm_cpu_machine *mach;
1099 int ret = 0;
1100
1101 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1102 if (!mach) {
1103 ret = -ENOMEM;
1104 goto out;
1105 }
1106 get_cpu_id((struct cpuid *) &mach->cpuid);
1107 mach->ibc = sclp.ibc;
1108 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1109 S390_ARCH_FAC_LIST_SIZE_BYTE);
1110 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1111 sizeof(S390_lowcore.stfle_fac_list));
1112 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1113 kvm->arch.model.ibc,
1114 kvm->arch.model.cpuid);
1115 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1116 mach->fac_mask[0],
1117 mach->fac_mask[1],
1118 mach->fac_mask[2]);
1119 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1120 mach->fac_list[0],
1121 mach->fac_list[1],
1122 mach->fac_list[2]);
1123 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1124 ret = -EFAULT;
1125 kfree(mach);
1126 out:
1127 return ret;
1128 }
1129
1130 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1131 struct kvm_device_attr *attr)
1132 {
1133 struct kvm_s390_vm_cpu_feat data;
1134
1135 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1136 KVM_S390_VM_CPU_FEAT_NR_BITS);
1137 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1138 return -EFAULT;
1139 return 0;
1140 }
1141
1142 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1143 struct kvm_device_attr *attr)
1144 {
1145 struct kvm_s390_vm_cpu_feat data;
1146
1147 bitmap_copy((unsigned long *) data.feat,
1148 kvm_s390_available_cpu_feat,
1149 KVM_S390_VM_CPU_FEAT_NR_BITS);
1150 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1151 return -EFAULT;
1152 return 0;
1153 }
1154
1155 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1156 struct kvm_device_attr *attr)
1157 {
1158 /*
1159 * Once we can actually configure subfunctions (kernel + hw support),
1160 * we have to check if they were already set by user space, if so copy
1161 * them from kvm->arch.
1162 */
1163 return -ENXIO;
1164 }
1165
1166 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1167 struct kvm_device_attr *attr)
1168 {
1169 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1170 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1171 return -EFAULT;
1172 return 0;
1173 }
1174 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1175 {
1176 int ret = -ENXIO;
1177
1178 switch (attr->attr) {
1179 case KVM_S390_VM_CPU_PROCESSOR:
1180 ret = kvm_s390_get_processor(kvm, attr);
1181 break;
1182 case KVM_S390_VM_CPU_MACHINE:
1183 ret = kvm_s390_get_machine(kvm, attr);
1184 break;
1185 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1186 ret = kvm_s390_get_processor_feat(kvm, attr);
1187 break;
1188 case KVM_S390_VM_CPU_MACHINE_FEAT:
1189 ret = kvm_s390_get_machine_feat(kvm, attr);
1190 break;
1191 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1192 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1193 break;
1194 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1195 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1196 break;
1197 }
1198 return ret;
1199 }
1200
1201 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1202 {
1203 int ret;
1204
1205 switch (attr->group) {
1206 case KVM_S390_VM_MEM_CTRL:
1207 ret = kvm_s390_set_mem_control(kvm, attr);
1208 break;
1209 case KVM_S390_VM_TOD:
1210 ret = kvm_s390_set_tod(kvm, attr);
1211 break;
1212 case KVM_S390_VM_CPU_MODEL:
1213 ret = kvm_s390_set_cpu_model(kvm, attr);
1214 break;
1215 case KVM_S390_VM_CRYPTO:
1216 ret = kvm_s390_vm_set_crypto(kvm, attr);
1217 break;
1218 case KVM_S390_VM_MIGRATION:
1219 ret = kvm_s390_vm_set_migration(kvm, attr);
1220 break;
1221 default:
1222 ret = -ENXIO;
1223 break;
1224 }
1225
1226 return ret;
1227 }
1228
1229 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1230 {
1231 int ret;
1232
1233 switch (attr->group) {
1234 case KVM_S390_VM_MEM_CTRL:
1235 ret = kvm_s390_get_mem_control(kvm, attr);
1236 break;
1237 case KVM_S390_VM_TOD:
1238 ret = kvm_s390_get_tod(kvm, attr);
1239 break;
1240 case KVM_S390_VM_CPU_MODEL:
1241 ret = kvm_s390_get_cpu_model(kvm, attr);
1242 break;
1243 case KVM_S390_VM_MIGRATION:
1244 ret = kvm_s390_vm_get_migration(kvm, attr);
1245 break;
1246 default:
1247 ret = -ENXIO;
1248 break;
1249 }
1250
1251 return ret;
1252 }
1253
1254 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1255 {
1256 int ret;
1257
1258 switch (attr->group) {
1259 case KVM_S390_VM_MEM_CTRL:
1260 switch (attr->attr) {
1261 case KVM_S390_VM_MEM_ENABLE_CMMA:
1262 case KVM_S390_VM_MEM_CLR_CMMA:
1263 ret = sclp.has_cmma ? 0 : -ENXIO;
1264 break;
1265 case KVM_S390_VM_MEM_LIMIT_SIZE:
1266 ret = 0;
1267 break;
1268 default:
1269 ret = -ENXIO;
1270 break;
1271 }
1272 break;
1273 case KVM_S390_VM_TOD:
1274 switch (attr->attr) {
1275 case KVM_S390_VM_TOD_LOW:
1276 case KVM_S390_VM_TOD_HIGH:
1277 ret = 0;
1278 break;
1279 default:
1280 ret = -ENXIO;
1281 break;
1282 }
1283 break;
1284 case KVM_S390_VM_CPU_MODEL:
1285 switch (attr->attr) {
1286 case KVM_S390_VM_CPU_PROCESSOR:
1287 case KVM_S390_VM_CPU_MACHINE:
1288 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1289 case KVM_S390_VM_CPU_MACHINE_FEAT:
1290 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1291 ret = 0;
1292 break;
1293 /* configuring subfunctions is not supported yet */
1294 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1295 default:
1296 ret = -ENXIO;
1297 break;
1298 }
1299 break;
1300 case KVM_S390_VM_CRYPTO:
1301 switch (attr->attr) {
1302 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1303 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1304 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1305 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1306 ret = 0;
1307 break;
1308 default:
1309 ret = -ENXIO;
1310 break;
1311 }
1312 break;
1313 case KVM_S390_VM_MIGRATION:
1314 ret = 0;
1315 break;
1316 default:
1317 ret = -ENXIO;
1318 break;
1319 }
1320
1321 return ret;
1322 }
1323
1324 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1325 {
1326 uint8_t *keys;
1327 uint64_t hva;
1328 int i, r = 0;
1329
1330 if (args->flags != 0)
1331 return -EINVAL;
1332
1333 /* Is this guest using storage keys? */
1334 if (!mm_use_skey(current->mm))
1335 return KVM_S390_GET_SKEYS_NONE;
1336
1337 /* Enforce sane limit on memory allocation */
1338 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1339 return -EINVAL;
1340
1341 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1342 if (!keys)
1343 return -ENOMEM;
1344
1345 down_read(&current->mm->mmap_sem);
1346 for (i = 0; i < args->count; i++) {
1347 hva = gfn_to_hva(kvm, args->start_gfn + i);
1348 if (kvm_is_error_hva(hva)) {
1349 r = -EFAULT;
1350 break;
1351 }
1352
1353 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1354 if (r)
1355 break;
1356 }
1357 up_read(&current->mm->mmap_sem);
1358
1359 if (!r) {
1360 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1361 sizeof(uint8_t) * args->count);
1362 if (r)
1363 r = -EFAULT;
1364 }
1365
1366 kvfree(keys);
1367 return r;
1368 }
1369
1370 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1371 {
1372 uint8_t *keys;
1373 uint64_t hva;
1374 int i, r = 0;
1375
1376 if (args->flags != 0)
1377 return -EINVAL;
1378
1379 /* Enforce sane limit on memory allocation */
1380 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1381 return -EINVAL;
1382
1383 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1384 if (!keys)
1385 return -ENOMEM;
1386
1387 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1388 sizeof(uint8_t) * args->count);
1389 if (r) {
1390 r = -EFAULT;
1391 goto out;
1392 }
1393
1394 /* Enable storage key handling for the guest */
1395 r = s390_enable_skey();
1396 if (r)
1397 goto out;
1398
1399 down_read(&current->mm->mmap_sem);
1400 for (i = 0; i < args->count; i++) {
1401 hva = gfn_to_hva(kvm, args->start_gfn + i);
1402 if (kvm_is_error_hva(hva)) {
1403 r = -EFAULT;
1404 break;
1405 }
1406
1407 /* Lowest order bit is reserved */
1408 if (keys[i] & 0x01) {
1409 r = -EINVAL;
1410 break;
1411 }
1412
1413 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1414 if (r)
1415 break;
1416 }
1417 up_read(&current->mm->mmap_sem);
1418 out:
1419 kvfree(keys);
1420 return r;
1421 }
1422
1423 /*
1424 * Base address and length must be sent at the start of each block, therefore
1425 * it's cheaper to send some clean data, as long as it's less than the size of
1426 * two longs.
1427 */
1428 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1429 /* for consistency */
1430 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1431
1432 /*
1433 * This function searches for the next page with dirty CMMA attributes, and
1434 * saves the attributes in the buffer up to either the end of the buffer or
1435 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1436 * no trailing clean bytes are saved.
1437 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1438 * output buffer will indicate 0 as length.
1439 */
1440 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1441 struct kvm_s390_cmma_log *args)
1442 {
1443 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1444 unsigned long bufsize, hva, pgstev, i, next, cur;
1445 int srcu_idx, peek, r = 0, rr;
1446 u8 *res;
1447
1448 cur = args->start_gfn;
1449 i = next = pgstev = 0;
1450
1451 if (unlikely(!kvm->arch.use_cmma))
1452 return -ENXIO;
1453 /* Invalid/unsupported flags were specified */
1454 if (args->flags & ~KVM_S390_CMMA_PEEK)
1455 return -EINVAL;
1456 /* Migration mode query, and we are not doing a migration */
1457 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1458 if (!peek && !s)
1459 return -EINVAL;
1460 /* CMMA is disabled or was not used, or the buffer has length zero */
1461 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1462 if (!bufsize || !kvm->mm->context.use_cmma) {
1463 memset(args, 0, sizeof(*args));
1464 return 0;
1465 }
1466
1467 if (!peek) {
1468 /* We are not peeking, and there are no dirty pages */
1469 if (!atomic64_read(&s->dirty_pages)) {
1470 memset(args, 0, sizeof(*args));
1471 return 0;
1472 }
1473 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1474 args->start_gfn);
1475 if (cur >= s->bitmap_size) /* nothing found, loop back */
1476 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1477 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1478 memset(args, 0, sizeof(*args));
1479 return 0;
1480 }
1481 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1482 }
1483
1484 res = vmalloc(bufsize);
1485 if (!res)
1486 return -ENOMEM;
1487
1488 args->start_gfn = cur;
1489
1490 down_read(&kvm->mm->mmap_sem);
1491 srcu_idx = srcu_read_lock(&kvm->srcu);
1492 while (i < bufsize) {
1493 hva = gfn_to_hva(kvm, cur);
1494 if (kvm_is_error_hva(hva)) {
1495 r = -EFAULT;
1496 break;
1497 }
1498 /* decrement only if we actually flipped the bit to 0 */
1499 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1500 atomic64_dec(&s->dirty_pages);
1501 r = get_pgste(kvm->mm, hva, &pgstev);
1502 if (r < 0)
1503 pgstev = 0;
1504 /* save the value */
1505 res[i++] = (pgstev >> 24) & 0x3;
1506 /*
1507 * if the next bit is too far away, stop.
1508 * if we reached the previous "next", find the next one
1509 */
1510 if (!peek) {
1511 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1512 break;
1513 if (cur == next)
1514 next = find_next_bit(s->pgste_bitmap,
1515 s->bitmap_size, cur + 1);
1516 /* reached the end of the bitmap or of the buffer, stop */
1517 if ((next >= s->bitmap_size) ||
1518 (next >= args->start_gfn + bufsize))
1519 break;
1520 }
1521 cur++;
1522 }
1523 srcu_read_unlock(&kvm->srcu, srcu_idx);
1524 up_read(&kvm->mm->mmap_sem);
1525 args->count = i;
1526 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1527
1528 rr = copy_to_user((void __user *)args->values, res, args->count);
1529 if (rr)
1530 r = -EFAULT;
1531
1532 vfree(res);
1533 return r;
1534 }
1535
1536 /*
1537 * This function sets the CMMA attributes for the given pages. If the input
1538 * buffer has zero length, no action is taken, otherwise the attributes are
1539 * set and the mm->context.use_cmma flag is set.
1540 */
1541 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1542 const struct kvm_s390_cmma_log *args)
1543 {
1544 unsigned long hva, mask, pgstev, i;
1545 uint8_t *bits;
1546 int srcu_idx, r = 0;
1547
1548 mask = args->mask;
1549
1550 if (!kvm->arch.use_cmma)
1551 return -ENXIO;
1552 /* invalid/unsupported flags */
1553 if (args->flags != 0)
1554 return -EINVAL;
1555 /* Enforce sane limit on memory allocation */
1556 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1557 return -EINVAL;
1558 /* Nothing to do */
1559 if (args->count == 0)
1560 return 0;
1561
1562 bits = vmalloc(sizeof(*bits) * args->count);
1563 if (!bits)
1564 return -ENOMEM;
1565
1566 r = copy_from_user(bits, (void __user *)args->values, args->count);
1567 if (r) {
1568 r = -EFAULT;
1569 goto out;
1570 }
1571
1572 down_read(&kvm->mm->mmap_sem);
1573 srcu_idx = srcu_read_lock(&kvm->srcu);
1574 for (i = 0; i < args->count; i++) {
1575 hva = gfn_to_hva(kvm, args->start_gfn + i);
1576 if (kvm_is_error_hva(hva)) {
1577 r = -EFAULT;
1578 break;
1579 }
1580
1581 pgstev = bits[i];
1582 pgstev = pgstev << 24;
1583 mask &= _PGSTE_GPS_USAGE_MASK;
1584 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1585 }
1586 srcu_read_unlock(&kvm->srcu, srcu_idx);
1587 up_read(&kvm->mm->mmap_sem);
1588
1589 if (!kvm->mm->context.use_cmma) {
1590 down_write(&kvm->mm->mmap_sem);
1591 kvm->mm->context.use_cmma = 1;
1592 up_write(&kvm->mm->mmap_sem);
1593 }
1594 out:
1595 vfree(bits);
1596 return r;
1597 }
1598
1599 long kvm_arch_vm_ioctl(struct file *filp,
1600 unsigned int ioctl, unsigned long arg)
1601 {
1602 struct kvm *kvm = filp->private_data;
1603 void __user *argp = (void __user *)arg;
1604 struct kvm_device_attr attr;
1605 int r;
1606
1607 switch (ioctl) {
1608 case KVM_S390_INTERRUPT: {
1609 struct kvm_s390_interrupt s390int;
1610
1611 r = -EFAULT;
1612 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1613 break;
1614 r = kvm_s390_inject_vm(kvm, &s390int);
1615 break;
1616 }
1617 case KVM_ENABLE_CAP: {
1618 struct kvm_enable_cap cap;
1619 r = -EFAULT;
1620 if (copy_from_user(&cap, argp, sizeof(cap)))
1621 break;
1622 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1623 break;
1624 }
1625 case KVM_CREATE_IRQCHIP: {
1626 struct kvm_irq_routing_entry routing;
1627
1628 r = -EINVAL;
1629 if (kvm->arch.use_irqchip) {
1630 /* Set up dummy routing. */
1631 memset(&routing, 0, sizeof(routing));
1632 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1633 }
1634 break;
1635 }
1636 case KVM_SET_DEVICE_ATTR: {
1637 r = -EFAULT;
1638 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1639 break;
1640 r = kvm_s390_vm_set_attr(kvm, &attr);
1641 break;
1642 }
1643 case KVM_GET_DEVICE_ATTR: {
1644 r = -EFAULT;
1645 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1646 break;
1647 r = kvm_s390_vm_get_attr(kvm, &attr);
1648 break;
1649 }
1650 case KVM_HAS_DEVICE_ATTR: {
1651 r = -EFAULT;
1652 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1653 break;
1654 r = kvm_s390_vm_has_attr(kvm, &attr);
1655 break;
1656 }
1657 case KVM_S390_GET_SKEYS: {
1658 struct kvm_s390_skeys args;
1659
1660 r = -EFAULT;
1661 if (copy_from_user(&args, argp,
1662 sizeof(struct kvm_s390_skeys)))
1663 break;
1664 r = kvm_s390_get_skeys(kvm, &args);
1665 break;
1666 }
1667 case KVM_S390_SET_SKEYS: {
1668 struct kvm_s390_skeys args;
1669
1670 r = -EFAULT;
1671 if (copy_from_user(&args, argp,
1672 sizeof(struct kvm_s390_skeys)))
1673 break;
1674 r = kvm_s390_set_skeys(kvm, &args);
1675 break;
1676 }
1677 case KVM_S390_GET_CMMA_BITS: {
1678 struct kvm_s390_cmma_log args;
1679
1680 r = -EFAULT;
1681 if (copy_from_user(&args, argp, sizeof(args)))
1682 break;
1683 r = kvm_s390_get_cmma_bits(kvm, &args);
1684 if (!r) {
1685 r = copy_to_user(argp, &args, sizeof(args));
1686 if (r)
1687 r = -EFAULT;
1688 }
1689 break;
1690 }
1691 case KVM_S390_SET_CMMA_BITS: {
1692 struct kvm_s390_cmma_log args;
1693
1694 r = -EFAULT;
1695 if (copy_from_user(&args, argp, sizeof(args)))
1696 break;
1697 r = kvm_s390_set_cmma_bits(kvm, &args);
1698 break;
1699 }
1700 default:
1701 r = -ENOTTY;
1702 }
1703
1704 return r;
1705 }
1706
1707 static int kvm_s390_query_ap_config(u8 *config)
1708 {
1709 u32 fcn_code = 0x04000000UL;
1710 u32 cc = 0;
1711
1712 memset(config, 0, 128);
1713 asm volatile(
1714 "lgr 0,%1\n"
1715 "lgr 2,%2\n"
1716 ".long 0xb2af0000\n" /* PQAP(QCI) */
1717 "0: ipm %0\n"
1718 "srl %0,28\n"
1719 "1:\n"
1720 EX_TABLE(0b, 1b)
1721 : "+r" (cc)
1722 : "r" (fcn_code), "r" (config)
1723 : "cc", "0", "2", "memory"
1724 );
1725
1726 return cc;
1727 }
1728
1729 static int kvm_s390_apxa_installed(void)
1730 {
1731 u8 config[128];
1732 int cc;
1733
1734 if (test_facility(12)) {
1735 cc = kvm_s390_query_ap_config(config);
1736
1737 if (cc)
1738 pr_err("PQAP(QCI) failed with cc=%d", cc);
1739 else
1740 return config[0] & 0x40;
1741 }
1742
1743 return 0;
1744 }
1745
1746 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1747 {
1748 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1749
1750 if (kvm_s390_apxa_installed())
1751 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1752 else
1753 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1754 }
1755
1756 static u64 kvm_s390_get_initial_cpuid(void)
1757 {
1758 struct cpuid cpuid;
1759
1760 get_cpu_id(&cpuid);
1761 cpuid.version = 0xff;
1762 return *((u64 *) &cpuid);
1763 }
1764
1765 static void kvm_s390_crypto_init(struct kvm *kvm)
1766 {
1767 if (!test_kvm_facility(kvm, 76))
1768 return;
1769
1770 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1771 kvm_s390_set_crycb_format(kvm);
1772
1773 /* Enable AES/DEA protected key functions by default */
1774 kvm->arch.crypto.aes_kw = 1;
1775 kvm->arch.crypto.dea_kw = 1;
1776 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1777 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1778 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1779 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1780 }
1781
1782 static void sca_dispose(struct kvm *kvm)
1783 {
1784 if (kvm->arch.use_esca)
1785 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1786 else
1787 free_page((unsigned long)(kvm->arch.sca));
1788 kvm->arch.sca = NULL;
1789 }
1790
1791 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1792 {
1793 gfp_t alloc_flags = GFP_KERNEL;
1794 int i, rc;
1795 char debug_name[16];
1796 static unsigned long sca_offset;
1797
1798 rc = -EINVAL;
1799 #ifdef CONFIG_KVM_S390_UCONTROL
1800 if (type & ~KVM_VM_S390_UCONTROL)
1801 goto out_err;
1802 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1803 goto out_err;
1804 #else
1805 if (type)
1806 goto out_err;
1807 #endif
1808
1809 rc = s390_enable_sie();
1810 if (rc)
1811 goto out_err;
1812
1813 rc = -ENOMEM;
1814
1815 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1816
1817 kvm->arch.use_esca = 0; /* start with basic SCA */
1818 if (!sclp.has_64bscao)
1819 alloc_flags |= GFP_DMA;
1820 rwlock_init(&kvm->arch.sca_lock);
1821 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1822 if (!kvm->arch.sca)
1823 goto out_err;
1824 spin_lock(&kvm_lock);
1825 sca_offset += 16;
1826 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1827 sca_offset = 0;
1828 kvm->arch.sca = (struct bsca_block *)
1829 ((char *) kvm->arch.sca + sca_offset);
1830 spin_unlock(&kvm_lock);
1831
1832 sprintf(debug_name, "kvm-%u", current->pid);
1833
1834 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1835 if (!kvm->arch.dbf)
1836 goto out_err;
1837
1838 kvm->arch.sie_page2 =
1839 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1840 if (!kvm->arch.sie_page2)
1841 goto out_err;
1842
1843 /* Populate the facility mask initially. */
1844 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1845 sizeof(S390_lowcore.stfle_fac_list));
1846 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1847 if (i < kvm_s390_fac_list_mask_size())
1848 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1849 else
1850 kvm->arch.model.fac_mask[i] = 0UL;
1851 }
1852
1853 /* Populate the facility list initially. */
1854 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1855 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1856 S390_ARCH_FAC_LIST_SIZE_BYTE);
1857
1858 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1859 set_kvm_facility(kvm->arch.model.fac_list, 74);
1860
1861 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1862 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1863
1864 kvm_s390_crypto_init(kvm);
1865
1866 mutex_init(&kvm->arch.float_int.ais_lock);
1867 kvm->arch.float_int.simm = 0;
1868 kvm->arch.float_int.nimm = 0;
1869 kvm->arch.float_int.ais_enabled = 0;
1870 spin_lock_init(&kvm->arch.float_int.lock);
1871 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1872 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1873 init_waitqueue_head(&kvm->arch.ipte_wq);
1874 mutex_init(&kvm->arch.ipte_mutex);
1875
1876 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1877 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1878
1879 if (type & KVM_VM_S390_UCONTROL) {
1880 kvm->arch.gmap = NULL;
1881 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1882 } else {
1883 if (sclp.hamax == U64_MAX)
1884 kvm->arch.mem_limit = TASK_SIZE_MAX;
1885 else
1886 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1887 sclp.hamax + 1);
1888 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1889 if (!kvm->arch.gmap)
1890 goto out_err;
1891 kvm->arch.gmap->private = kvm;
1892 kvm->arch.gmap->pfault_enabled = 0;
1893 }
1894
1895 kvm->arch.css_support = 0;
1896 kvm->arch.use_irqchip = 0;
1897 kvm->arch.epoch = 0;
1898
1899 spin_lock_init(&kvm->arch.start_stop_lock);
1900 kvm_s390_vsie_init(kvm);
1901 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1902
1903 return 0;
1904 out_err:
1905 free_page((unsigned long)kvm->arch.sie_page2);
1906 debug_unregister(kvm->arch.dbf);
1907 sca_dispose(kvm);
1908 KVM_EVENT(3, "creation of vm failed: %d", rc);
1909 return rc;
1910 }
1911
1912 bool kvm_arch_has_vcpu_debugfs(void)
1913 {
1914 return false;
1915 }
1916
1917 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1918 {
1919 return 0;
1920 }
1921
1922 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1923 {
1924 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1925 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1926 kvm_s390_clear_local_irqs(vcpu);
1927 kvm_clear_async_pf_completion_queue(vcpu);
1928 if (!kvm_is_ucontrol(vcpu->kvm))
1929 sca_del_vcpu(vcpu);
1930
1931 if (kvm_is_ucontrol(vcpu->kvm))
1932 gmap_remove(vcpu->arch.gmap);
1933
1934 if (vcpu->kvm->arch.use_cmma)
1935 kvm_s390_vcpu_unsetup_cmma(vcpu);
1936 free_page((unsigned long)(vcpu->arch.sie_block));
1937
1938 kvm_vcpu_uninit(vcpu);
1939 kmem_cache_free(kvm_vcpu_cache, vcpu);
1940 }
1941
1942 static void kvm_free_vcpus(struct kvm *kvm)
1943 {
1944 unsigned int i;
1945 struct kvm_vcpu *vcpu;
1946
1947 kvm_for_each_vcpu(i, vcpu, kvm)
1948 kvm_arch_vcpu_destroy(vcpu);
1949
1950 mutex_lock(&kvm->lock);
1951 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1952 kvm->vcpus[i] = NULL;
1953
1954 atomic_set(&kvm->online_vcpus, 0);
1955 mutex_unlock(&kvm->lock);
1956 }
1957
1958 void kvm_arch_destroy_vm(struct kvm *kvm)
1959 {
1960 kvm_free_vcpus(kvm);
1961 sca_dispose(kvm);
1962 debug_unregister(kvm->arch.dbf);
1963 free_page((unsigned long)kvm->arch.sie_page2);
1964 if (!kvm_is_ucontrol(kvm))
1965 gmap_remove(kvm->arch.gmap);
1966 kvm_s390_destroy_adapters(kvm);
1967 kvm_s390_clear_float_irqs(kvm);
1968 kvm_s390_vsie_destroy(kvm);
1969 if (kvm->arch.migration_state) {
1970 vfree(kvm->arch.migration_state->pgste_bitmap);
1971 kfree(kvm->arch.migration_state);
1972 }
1973 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1974 }
1975
1976 /* Section: vcpu related */
1977 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1978 {
1979 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1980 if (!vcpu->arch.gmap)
1981 return -ENOMEM;
1982 vcpu->arch.gmap->private = vcpu->kvm;
1983
1984 return 0;
1985 }
1986
1987 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1988 {
1989 if (!kvm_s390_use_sca_entries())
1990 return;
1991 read_lock(&vcpu->kvm->arch.sca_lock);
1992 if (vcpu->kvm->arch.use_esca) {
1993 struct esca_block *sca = vcpu->kvm->arch.sca;
1994
1995 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1996 sca->cpu[vcpu->vcpu_id].sda = 0;
1997 } else {
1998 struct bsca_block *sca = vcpu->kvm->arch.sca;
1999
2000 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2001 sca->cpu[vcpu->vcpu_id].sda = 0;
2002 }
2003 read_unlock(&vcpu->kvm->arch.sca_lock);
2004 }
2005
2006 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2007 {
2008 if (!kvm_s390_use_sca_entries()) {
2009 struct bsca_block *sca = vcpu->kvm->arch.sca;
2010
2011 /* we still need the basic sca for the ipte control */
2012 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2013 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2014 }
2015 read_lock(&vcpu->kvm->arch.sca_lock);
2016 if (vcpu->kvm->arch.use_esca) {
2017 struct esca_block *sca = vcpu->kvm->arch.sca;
2018
2019 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2020 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2021 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2022 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2023 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2024 } else {
2025 struct bsca_block *sca = vcpu->kvm->arch.sca;
2026
2027 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2028 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2029 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2030 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2031 }
2032 read_unlock(&vcpu->kvm->arch.sca_lock);
2033 }
2034
2035 /* Basic SCA to Extended SCA data copy routines */
2036 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2037 {
2038 d->sda = s->sda;
2039 d->sigp_ctrl.c = s->sigp_ctrl.c;
2040 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2041 }
2042
2043 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2044 {
2045 int i;
2046
2047 d->ipte_control = s->ipte_control;
2048 d->mcn[0] = s->mcn;
2049 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2050 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2051 }
2052
2053 static int sca_switch_to_extended(struct kvm *kvm)
2054 {
2055 struct bsca_block *old_sca = kvm->arch.sca;
2056 struct esca_block *new_sca;
2057 struct kvm_vcpu *vcpu;
2058 unsigned int vcpu_idx;
2059 u32 scaol, scaoh;
2060
2061 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2062 if (!new_sca)
2063 return -ENOMEM;
2064
2065 scaoh = (u32)((u64)(new_sca) >> 32);
2066 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2067
2068 kvm_s390_vcpu_block_all(kvm);
2069 write_lock(&kvm->arch.sca_lock);
2070
2071 sca_copy_b_to_e(new_sca, old_sca);
2072
2073 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2074 vcpu->arch.sie_block->scaoh = scaoh;
2075 vcpu->arch.sie_block->scaol = scaol;
2076 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2077 }
2078 kvm->arch.sca = new_sca;
2079 kvm->arch.use_esca = 1;
2080
2081 write_unlock(&kvm->arch.sca_lock);
2082 kvm_s390_vcpu_unblock_all(kvm);
2083
2084 free_page((unsigned long)old_sca);
2085
2086 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2087 old_sca, kvm->arch.sca);
2088 return 0;
2089 }
2090
2091 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2092 {
2093 int rc;
2094
2095 if (!kvm_s390_use_sca_entries()) {
2096 if (id < KVM_MAX_VCPUS)
2097 return true;
2098 return false;
2099 }
2100 if (id < KVM_S390_BSCA_CPU_SLOTS)
2101 return true;
2102 if (!sclp.has_esca || !sclp.has_64bscao)
2103 return false;
2104
2105 mutex_lock(&kvm->lock);
2106 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2107 mutex_unlock(&kvm->lock);
2108
2109 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2110 }
2111
2112 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2113 {
2114 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2115 kvm_clear_async_pf_completion_queue(vcpu);
2116 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2117 KVM_SYNC_GPRS |
2118 KVM_SYNC_ACRS |
2119 KVM_SYNC_CRS |
2120 KVM_SYNC_ARCH0 |
2121 KVM_SYNC_PFAULT;
2122 kvm_s390_set_prefix(vcpu, 0);
2123 if (test_kvm_facility(vcpu->kvm, 64))
2124 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2125 if (test_kvm_facility(vcpu->kvm, 133))
2126 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2127 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2128 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2129 */
2130 if (MACHINE_HAS_VX)
2131 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2132 else
2133 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2134
2135 if (kvm_is_ucontrol(vcpu->kvm))
2136 return __kvm_ucontrol_vcpu_init(vcpu);
2137
2138 return 0;
2139 }
2140
2141 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2142 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2143 {
2144 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2145 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2146 vcpu->arch.cputm_start = get_tod_clock_fast();
2147 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2148 }
2149
2150 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2151 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2152 {
2153 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2154 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2155 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2156 vcpu->arch.cputm_start = 0;
2157 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2158 }
2159
2160 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2161 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2162 {
2163 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2164 vcpu->arch.cputm_enabled = true;
2165 __start_cpu_timer_accounting(vcpu);
2166 }
2167
2168 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2169 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2170 {
2171 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2172 __stop_cpu_timer_accounting(vcpu);
2173 vcpu->arch.cputm_enabled = false;
2174 }
2175
2176 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2177 {
2178 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2179 __enable_cpu_timer_accounting(vcpu);
2180 preempt_enable();
2181 }
2182
2183 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2184 {
2185 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2186 __disable_cpu_timer_accounting(vcpu);
2187 preempt_enable();
2188 }
2189
2190 /* set the cpu timer - may only be called from the VCPU thread itself */
2191 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2192 {
2193 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2194 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2195 if (vcpu->arch.cputm_enabled)
2196 vcpu->arch.cputm_start = get_tod_clock_fast();
2197 vcpu->arch.sie_block->cputm = cputm;
2198 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2199 preempt_enable();
2200 }
2201
2202 /* update and get the cpu timer - can also be called from other VCPU threads */
2203 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2204 {
2205 unsigned int seq;
2206 __u64 value;
2207
2208 if (unlikely(!vcpu->arch.cputm_enabled))
2209 return vcpu->arch.sie_block->cputm;
2210
2211 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2212 do {
2213 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2214 /*
2215 * If the writer would ever execute a read in the critical
2216 * section, e.g. in irq context, we have a deadlock.
2217 */
2218 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2219 value = vcpu->arch.sie_block->cputm;
2220 /* if cputm_start is 0, accounting is being started/stopped */
2221 if (likely(vcpu->arch.cputm_start))
2222 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2223 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2224 preempt_enable();
2225 return value;
2226 }
2227
2228 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2229 {
2230
2231 gmap_enable(vcpu->arch.enabled_gmap);
2232 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2233 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2234 __start_cpu_timer_accounting(vcpu);
2235 vcpu->cpu = cpu;
2236 }
2237
2238 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2239 {
2240 vcpu->cpu = -1;
2241 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2242 __stop_cpu_timer_accounting(vcpu);
2243 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2244 vcpu->arch.enabled_gmap = gmap_get_enabled();
2245 gmap_disable(vcpu->arch.enabled_gmap);
2246
2247 }
2248
2249 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2250 {
2251 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2252 vcpu->arch.sie_block->gpsw.mask = 0UL;
2253 vcpu->arch.sie_block->gpsw.addr = 0UL;
2254 kvm_s390_set_prefix(vcpu, 0);
2255 kvm_s390_set_cpu_timer(vcpu, 0);
2256 vcpu->arch.sie_block->ckc = 0UL;
2257 vcpu->arch.sie_block->todpr = 0;
2258 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2259 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2260 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2261 /* make sure the new fpc will be lazily loaded */
2262 save_fpu_regs();
2263 current->thread.fpu.fpc = 0;
2264 vcpu->arch.sie_block->gbea = 1;
2265 vcpu->arch.sie_block->pp = 0;
2266 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2267 kvm_clear_async_pf_completion_queue(vcpu);
2268 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2269 kvm_s390_vcpu_stop(vcpu);
2270 kvm_s390_clear_local_irqs(vcpu);
2271 }
2272
2273 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2274 {
2275 mutex_lock(&vcpu->kvm->lock);
2276 preempt_disable();
2277 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2278 preempt_enable();
2279 mutex_unlock(&vcpu->kvm->lock);
2280 if (!kvm_is_ucontrol(vcpu->kvm)) {
2281 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2282 sca_add_vcpu(vcpu);
2283 }
2284 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2285 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2286 /* make vcpu_load load the right gmap on the first trigger */
2287 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2288 }
2289
2290 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2291 {
2292 if (!test_kvm_facility(vcpu->kvm, 76))
2293 return;
2294
2295 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2296
2297 if (vcpu->kvm->arch.crypto.aes_kw)
2298 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2299 if (vcpu->kvm->arch.crypto.dea_kw)
2300 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2301
2302 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2303 }
2304
2305 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2306 {
2307 free_page(vcpu->arch.sie_block->cbrlo);
2308 vcpu->arch.sie_block->cbrlo = 0;
2309 }
2310
2311 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2312 {
2313 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2314 if (!vcpu->arch.sie_block->cbrlo)
2315 return -ENOMEM;
2316
2317 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2318 return 0;
2319 }
2320
2321 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2322 {
2323 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2324
2325 vcpu->arch.sie_block->ibc = model->ibc;
2326 if (test_kvm_facility(vcpu->kvm, 7))
2327 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2328 }
2329
2330 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2331 {
2332 int rc = 0;
2333
2334 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2335 CPUSTAT_SM |
2336 CPUSTAT_STOPPED);
2337
2338 if (test_kvm_facility(vcpu->kvm, 78))
2339 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2340 else if (test_kvm_facility(vcpu->kvm, 8))
2341 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2342
2343 kvm_s390_vcpu_setup_model(vcpu);
2344
2345 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2346 if (MACHINE_HAS_ESOP)
2347 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2348 if (test_kvm_facility(vcpu->kvm, 9))
2349 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2350 if (test_kvm_facility(vcpu->kvm, 73))
2351 vcpu->arch.sie_block->ecb |= ECB_TE;
2352
2353 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2354 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2355 if (test_kvm_facility(vcpu->kvm, 130))
2356 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2357 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2358 if (sclp.has_cei)
2359 vcpu->arch.sie_block->eca |= ECA_CEI;
2360 if (sclp.has_ib)
2361 vcpu->arch.sie_block->eca |= ECA_IB;
2362 if (sclp.has_siif)
2363 vcpu->arch.sie_block->eca |= ECA_SII;
2364 if (sclp.has_sigpif)
2365 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2366 if (test_kvm_facility(vcpu->kvm, 129)) {
2367 vcpu->arch.sie_block->eca |= ECA_VX;
2368 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2369 }
2370 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2371 | SDNXC;
2372 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2373
2374 if (sclp.has_kss)
2375 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2376 else
2377 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2378
2379 if (vcpu->kvm->arch.use_cmma) {
2380 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2381 if (rc)
2382 return rc;
2383 }
2384 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2385 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2386
2387 kvm_s390_vcpu_crypto_setup(vcpu);
2388
2389 return rc;
2390 }
2391
2392 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2393 unsigned int id)
2394 {
2395 struct kvm_vcpu *vcpu;
2396 struct sie_page *sie_page;
2397 int rc = -EINVAL;
2398
2399 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2400 goto out;
2401
2402 rc = -ENOMEM;
2403
2404 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2405 if (!vcpu)
2406 goto out;
2407
2408 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2409 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2410 if (!sie_page)
2411 goto out_free_cpu;
2412
2413 vcpu->arch.sie_block = &sie_page->sie_block;
2414 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2415
2416 /* the real guest size will always be smaller than msl */
2417 vcpu->arch.sie_block->mso = 0;
2418 vcpu->arch.sie_block->msl = sclp.hamax;
2419
2420 vcpu->arch.sie_block->icpua = id;
2421 spin_lock_init(&vcpu->arch.local_int.lock);
2422 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2423 vcpu->arch.local_int.wq = &vcpu->wq;
2424 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2425 seqcount_init(&vcpu->arch.cputm_seqcount);
2426
2427 rc = kvm_vcpu_init(vcpu, kvm, id);
2428 if (rc)
2429 goto out_free_sie_block;
2430 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2431 vcpu->arch.sie_block);
2432 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2433
2434 return vcpu;
2435 out_free_sie_block:
2436 free_page((unsigned long)(vcpu->arch.sie_block));
2437 out_free_cpu:
2438 kmem_cache_free(kvm_vcpu_cache, vcpu);
2439 out:
2440 return ERR_PTR(rc);
2441 }
2442
2443 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2444 {
2445 return kvm_s390_vcpu_has_irq(vcpu, 0);
2446 }
2447
2448 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2449 {
2450 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2451 exit_sie(vcpu);
2452 }
2453
2454 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2455 {
2456 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2457 }
2458
2459 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2460 {
2461 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2462 exit_sie(vcpu);
2463 }
2464
2465 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2466 {
2467 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2468 }
2469
2470 /*
2471 * Kick a guest cpu out of SIE and wait until SIE is not running.
2472 * If the CPU is not running (e.g. waiting as idle) the function will
2473 * return immediately. */
2474 void exit_sie(struct kvm_vcpu *vcpu)
2475 {
2476 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2477 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2478 cpu_relax();
2479 }
2480
2481 /* Kick a guest cpu out of SIE to process a request synchronously */
2482 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2483 {
2484 kvm_make_request(req, vcpu);
2485 kvm_s390_vcpu_request(vcpu);
2486 }
2487
2488 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2489 unsigned long end)
2490 {
2491 struct kvm *kvm = gmap->private;
2492 struct kvm_vcpu *vcpu;
2493 unsigned long prefix;
2494 int i;
2495
2496 if (gmap_is_shadow(gmap))
2497 return;
2498 if (start >= 1UL << 31)
2499 /* We are only interested in prefix pages */
2500 return;
2501 kvm_for_each_vcpu(i, vcpu, kvm) {
2502 /* match against both prefix pages */
2503 prefix = kvm_s390_get_prefix(vcpu);
2504 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2505 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2506 start, end);
2507 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2508 }
2509 }
2510 }
2511
2512 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2513 {
2514 /* kvm common code refers to this, but never calls it */
2515 BUG();
2516 return 0;
2517 }
2518
2519 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2520 struct kvm_one_reg *reg)
2521 {
2522 int r = -EINVAL;
2523
2524 switch (reg->id) {
2525 case KVM_REG_S390_TODPR:
2526 r = put_user(vcpu->arch.sie_block->todpr,
2527 (u32 __user *)reg->addr);
2528 break;
2529 case KVM_REG_S390_EPOCHDIFF:
2530 r = put_user(vcpu->arch.sie_block->epoch,
2531 (u64 __user *)reg->addr);
2532 break;
2533 case KVM_REG_S390_CPU_TIMER:
2534 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2535 (u64 __user *)reg->addr);
2536 break;
2537 case KVM_REG_S390_CLOCK_COMP:
2538 r = put_user(vcpu->arch.sie_block->ckc,
2539 (u64 __user *)reg->addr);
2540 break;
2541 case KVM_REG_S390_PFTOKEN:
2542 r = put_user(vcpu->arch.pfault_token,
2543 (u64 __user *)reg->addr);
2544 break;
2545 case KVM_REG_S390_PFCOMPARE:
2546 r = put_user(vcpu->arch.pfault_compare,
2547 (u64 __user *)reg->addr);
2548 break;
2549 case KVM_REG_S390_PFSELECT:
2550 r = put_user(vcpu->arch.pfault_select,
2551 (u64 __user *)reg->addr);
2552 break;
2553 case KVM_REG_S390_PP:
2554 r = put_user(vcpu->arch.sie_block->pp,
2555 (u64 __user *)reg->addr);
2556 break;
2557 case KVM_REG_S390_GBEA:
2558 r = put_user(vcpu->arch.sie_block->gbea,
2559 (u64 __user *)reg->addr);
2560 break;
2561 default:
2562 break;
2563 }
2564
2565 return r;
2566 }
2567
2568 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2569 struct kvm_one_reg *reg)
2570 {
2571 int r = -EINVAL;
2572 __u64 val;
2573
2574 switch (reg->id) {
2575 case KVM_REG_S390_TODPR:
2576 r = get_user(vcpu->arch.sie_block->todpr,
2577 (u32 __user *)reg->addr);
2578 break;
2579 case KVM_REG_S390_EPOCHDIFF:
2580 r = get_user(vcpu->arch.sie_block->epoch,
2581 (u64 __user *)reg->addr);
2582 break;
2583 case KVM_REG_S390_CPU_TIMER:
2584 r = get_user(val, (u64 __user *)reg->addr);
2585 if (!r)
2586 kvm_s390_set_cpu_timer(vcpu, val);
2587 break;
2588 case KVM_REG_S390_CLOCK_COMP:
2589 r = get_user(vcpu->arch.sie_block->ckc,
2590 (u64 __user *)reg->addr);
2591 break;
2592 case KVM_REG_S390_PFTOKEN:
2593 r = get_user(vcpu->arch.pfault_token,
2594 (u64 __user *)reg->addr);
2595 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2596 kvm_clear_async_pf_completion_queue(vcpu);
2597 break;
2598 case KVM_REG_S390_PFCOMPARE:
2599 r = get_user(vcpu->arch.pfault_compare,
2600 (u64 __user *)reg->addr);
2601 break;
2602 case KVM_REG_S390_PFSELECT:
2603 r = get_user(vcpu->arch.pfault_select,
2604 (u64 __user *)reg->addr);
2605 break;
2606 case KVM_REG_S390_PP:
2607 r = get_user(vcpu->arch.sie_block->pp,
2608 (u64 __user *)reg->addr);
2609 break;
2610 case KVM_REG_S390_GBEA:
2611 r = get_user(vcpu->arch.sie_block->gbea,
2612 (u64 __user *)reg->addr);
2613 break;
2614 default:
2615 break;
2616 }
2617
2618 return r;
2619 }
2620
2621 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2622 {
2623 kvm_s390_vcpu_initial_reset(vcpu);
2624 return 0;
2625 }
2626
2627 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2628 {
2629 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2630 return 0;
2631 }
2632
2633 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2634 {
2635 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2636 return 0;
2637 }
2638
2639 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2640 struct kvm_sregs *sregs)
2641 {
2642 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2643 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2644 return 0;
2645 }
2646
2647 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2648 struct kvm_sregs *sregs)
2649 {
2650 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2651 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2652 return 0;
2653 }
2654
2655 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2656 {
2657 if (test_fp_ctl(fpu->fpc))
2658 return -EINVAL;
2659 vcpu->run->s.regs.fpc = fpu->fpc;
2660 if (MACHINE_HAS_VX)
2661 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2662 (freg_t *) fpu->fprs);
2663 else
2664 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2665 return 0;
2666 }
2667
2668 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2669 {
2670 /* make sure we have the latest values */
2671 save_fpu_regs();
2672 if (MACHINE_HAS_VX)
2673 convert_vx_to_fp((freg_t *) fpu->fprs,
2674 (__vector128 *) vcpu->run->s.regs.vrs);
2675 else
2676 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2677 fpu->fpc = vcpu->run->s.regs.fpc;
2678 return 0;
2679 }
2680
2681 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2682 {
2683 int rc = 0;
2684
2685 if (!is_vcpu_stopped(vcpu))
2686 rc = -EBUSY;
2687 else {
2688 vcpu->run->psw_mask = psw.mask;
2689 vcpu->run->psw_addr = psw.addr;
2690 }
2691 return rc;
2692 }
2693
2694 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2695 struct kvm_translation *tr)
2696 {
2697 return -EINVAL; /* not implemented yet */
2698 }
2699
2700 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2701 KVM_GUESTDBG_USE_HW_BP | \
2702 KVM_GUESTDBG_ENABLE)
2703
2704 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2705 struct kvm_guest_debug *dbg)
2706 {
2707 int rc = 0;
2708
2709 vcpu->guest_debug = 0;
2710 kvm_s390_clear_bp_data(vcpu);
2711
2712 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2713 return -EINVAL;
2714 if (!sclp.has_gpere)
2715 return -EINVAL;
2716
2717 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2718 vcpu->guest_debug = dbg->control;
2719 /* enforce guest PER */
2720 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2721
2722 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2723 rc = kvm_s390_import_bp_data(vcpu, dbg);
2724 } else {
2725 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2726 vcpu->arch.guestdbg.last_bp = 0;
2727 }
2728
2729 if (rc) {
2730 vcpu->guest_debug = 0;
2731 kvm_s390_clear_bp_data(vcpu);
2732 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2733 }
2734
2735 return rc;
2736 }
2737
2738 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2739 struct kvm_mp_state *mp_state)
2740 {
2741 /* CHECK_STOP and LOAD are not supported yet */
2742 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2743 KVM_MP_STATE_OPERATING;
2744 }
2745
2746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2747 struct kvm_mp_state *mp_state)
2748 {
2749 int rc = 0;
2750
2751 /* user space knows about this interface - let it control the state */
2752 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2753
2754 switch (mp_state->mp_state) {
2755 case KVM_MP_STATE_STOPPED:
2756 kvm_s390_vcpu_stop(vcpu);
2757 break;
2758 case KVM_MP_STATE_OPERATING:
2759 kvm_s390_vcpu_start(vcpu);
2760 break;
2761 case KVM_MP_STATE_LOAD:
2762 case KVM_MP_STATE_CHECK_STOP:
2763 /* fall through - CHECK_STOP and LOAD are not supported yet */
2764 default:
2765 rc = -ENXIO;
2766 }
2767
2768 return rc;
2769 }
2770
2771 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2772 {
2773 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2774 }
2775
2776 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2777 {
2778 retry:
2779 kvm_s390_vcpu_request_handled(vcpu);
2780 if (!kvm_request_pending(vcpu))
2781 return 0;
2782 /*
2783 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2784 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2785 * This ensures that the ipte instruction for this request has
2786 * already finished. We might race against a second unmapper that
2787 * wants to set the blocking bit. Lets just retry the request loop.
2788 */
2789 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2790 int rc;
2791 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2792 kvm_s390_get_prefix(vcpu),
2793 PAGE_SIZE * 2, PROT_WRITE);
2794 if (rc) {
2795 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2796 return rc;
2797 }
2798 goto retry;
2799 }
2800
2801 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2802 vcpu->arch.sie_block->ihcpu = 0xffff;
2803 goto retry;
2804 }
2805
2806 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2807 if (!ibs_enabled(vcpu)) {
2808 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2809 atomic_or(CPUSTAT_IBS,
2810 &vcpu->arch.sie_block->cpuflags);
2811 }
2812 goto retry;
2813 }
2814
2815 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2816 if (ibs_enabled(vcpu)) {
2817 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2818 atomic_andnot(CPUSTAT_IBS,
2819 &vcpu->arch.sie_block->cpuflags);
2820 }
2821 goto retry;
2822 }
2823
2824 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2825 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2826 goto retry;
2827 }
2828
2829 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2830 /*
2831 * Disable CMMA virtualization; we will emulate the ESSA
2832 * instruction manually, in order to provide additional
2833 * functionalities needed for live migration.
2834 */
2835 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2836 goto retry;
2837 }
2838
2839 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2840 /*
2841 * Re-enable CMMA virtualization if CMMA is available and
2842 * was used.
2843 */
2844 if ((vcpu->kvm->arch.use_cmma) &&
2845 (vcpu->kvm->mm->context.use_cmma))
2846 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2847 goto retry;
2848 }
2849
2850 /* nothing to do, just clear the request */
2851 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2852
2853 return 0;
2854 }
2855
2856 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2857 {
2858 struct kvm_vcpu *vcpu;
2859 int i;
2860
2861 mutex_lock(&kvm->lock);
2862 preempt_disable();
2863 kvm->arch.epoch = tod - get_tod_clock();
2864 kvm_s390_vcpu_block_all(kvm);
2865 kvm_for_each_vcpu(i, vcpu, kvm)
2866 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2867 kvm_s390_vcpu_unblock_all(kvm);
2868 preempt_enable();
2869 mutex_unlock(&kvm->lock);
2870 }
2871
2872 /**
2873 * kvm_arch_fault_in_page - fault-in guest page if necessary
2874 * @vcpu: The corresponding virtual cpu
2875 * @gpa: Guest physical address
2876 * @writable: Whether the page should be writable or not
2877 *
2878 * Make sure that a guest page has been faulted-in on the host.
2879 *
2880 * Return: Zero on success, negative error code otherwise.
2881 */
2882 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2883 {
2884 return gmap_fault(vcpu->arch.gmap, gpa,
2885 writable ? FAULT_FLAG_WRITE : 0);
2886 }
2887
2888 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2889 unsigned long token)
2890 {
2891 struct kvm_s390_interrupt inti;
2892 struct kvm_s390_irq irq;
2893
2894 if (start_token) {
2895 irq.u.ext.ext_params2 = token;
2896 irq.type = KVM_S390_INT_PFAULT_INIT;
2897 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2898 } else {
2899 inti.type = KVM_S390_INT_PFAULT_DONE;
2900 inti.parm64 = token;
2901 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2902 }
2903 }
2904
2905 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2906 struct kvm_async_pf *work)
2907 {
2908 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2909 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2910 }
2911
2912 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2913 struct kvm_async_pf *work)
2914 {
2915 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2916 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2917 }
2918
2919 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2920 struct kvm_async_pf *work)
2921 {
2922 /* s390 will always inject the page directly */
2923 }
2924
2925 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2926 {
2927 /*
2928 * s390 will always inject the page directly,
2929 * but we still want check_async_completion to cleanup
2930 */
2931 return true;
2932 }
2933
2934 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2935 {
2936 hva_t hva;
2937 struct kvm_arch_async_pf arch;
2938 int rc;
2939
2940 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2941 return 0;
2942 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2943 vcpu->arch.pfault_compare)
2944 return 0;
2945 if (psw_extint_disabled(vcpu))
2946 return 0;
2947 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2948 return 0;
2949 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2950 return 0;
2951 if (!vcpu->arch.gmap->pfault_enabled)
2952 return 0;
2953
2954 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2955 hva += current->thread.gmap_addr & ~PAGE_MASK;
2956 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2957 return 0;
2958
2959 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2960 return rc;
2961 }
2962
2963 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2964 {
2965 int rc, cpuflags;
2966
2967 /*
2968 * On s390 notifications for arriving pages will be delivered directly
2969 * to the guest but the house keeping for completed pfaults is
2970 * handled outside the worker.
2971 */
2972 kvm_check_async_pf_completion(vcpu);
2973
2974 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2975 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2976
2977 if (need_resched())
2978 schedule();
2979
2980 if (test_cpu_flag(CIF_MCCK_PENDING))
2981 s390_handle_mcck();
2982
2983 if (!kvm_is_ucontrol(vcpu->kvm)) {
2984 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2985 if (rc)
2986 return rc;
2987 }
2988
2989 rc = kvm_s390_handle_requests(vcpu);
2990 if (rc)
2991 return rc;
2992
2993 if (guestdbg_enabled(vcpu)) {
2994 kvm_s390_backup_guest_per_regs(vcpu);
2995 kvm_s390_patch_guest_per_regs(vcpu);
2996 }
2997
2998 vcpu->arch.sie_block->icptcode = 0;
2999 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3000 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3001 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3002
3003 return 0;
3004 }
3005
3006 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3007 {
3008 struct kvm_s390_pgm_info pgm_info = {
3009 .code = PGM_ADDRESSING,
3010 };
3011 u8 opcode, ilen;
3012 int rc;
3013
3014 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3015 trace_kvm_s390_sie_fault(vcpu);
3016
3017 /*
3018 * We want to inject an addressing exception, which is defined as a
3019 * suppressing or terminating exception. However, since we came here
3020 * by a DAT access exception, the PSW still points to the faulting
3021 * instruction since DAT exceptions are nullifying. So we've got
3022 * to look up the current opcode to get the length of the instruction
3023 * to be able to forward the PSW.
3024 */
3025 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3026 ilen = insn_length(opcode);
3027 if (rc < 0) {
3028 return rc;
3029 } else if (rc) {
3030 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3031 * Forward by arbitrary ilc, injection will take care of
3032 * nullification if necessary.
3033 */
3034 pgm_info = vcpu->arch.pgm;
3035 ilen = 4;
3036 }
3037 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3038 kvm_s390_forward_psw(vcpu, ilen);
3039 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3040 }
3041
3042 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3043 {
3044 struct mcck_volatile_info *mcck_info;
3045 struct sie_page *sie_page;
3046
3047 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3048 vcpu->arch.sie_block->icptcode);
3049 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3050
3051 if (guestdbg_enabled(vcpu))
3052 kvm_s390_restore_guest_per_regs(vcpu);
3053
3054 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3055 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3056
3057 if (exit_reason == -EINTR) {
3058 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3059 sie_page = container_of(vcpu->arch.sie_block,
3060 struct sie_page, sie_block);
3061 mcck_info = &sie_page->mcck_info;
3062 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3063 return 0;
3064 }
3065
3066 if (vcpu->arch.sie_block->icptcode > 0) {
3067 int rc = kvm_handle_sie_intercept(vcpu);
3068
3069 if (rc != -EOPNOTSUPP)
3070 return rc;
3071 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3072 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3073 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3074 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3075 return -EREMOTE;
3076 } else if (exit_reason != -EFAULT) {
3077 vcpu->stat.exit_null++;
3078 return 0;
3079 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3080 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3081 vcpu->run->s390_ucontrol.trans_exc_code =
3082 current->thread.gmap_addr;
3083 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3084 return -EREMOTE;
3085 } else if (current->thread.gmap_pfault) {
3086 trace_kvm_s390_major_guest_pfault(vcpu);
3087 current->thread.gmap_pfault = 0;
3088 if (kvm_arch_setup_async_pf(vcpu))
3089 return 0;
3090 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3091 }
3092 return vcpu_post_run_fault_in_sie(vcpu);
3093 }
3094
3095 static int __vcpu_run(struct kvm_vcpu *vcpu)
3096 {
3097 int rc, exit_reason;
3098
3099 /*
3100 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3101 * ning the guest), so that memslots (and other stuff) are protected
3102 */
3103 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3104
3105 do {
3106 rc = vcpu_pre_run(vcpu);
3107 if (rc)
3108 break;
3109
3110 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3111 /*
3112 * As PF_VCPU will be used in fault handler, between
3113 * guest_enter and guest_exit should be no uaccess.
3114 */
3115 local_irq_disable();
3116 guest_enter_irqoff();
3117 __disable_cpu_timer_accounting(vcpu);
3118 local_irq_enable();
3119 exit_reason = sie64a(vcpu->arch.sie_block,
3120 vcpu->run->s.regs.gprs);
3121 local_irq_disable();
3122 __enable_cpu_timer_accounting(vcpu);
3123 guest_exit_irqoff();
3124 local_irq_enable();
3125 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3126
3127 rc = vcpu_post_run(vcpu, exit_reason);
3128 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3129
3130 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3131 return rc;
3132 }
3133
3134 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3135 {
3136 struct runtime_instr_cb *riccb;
3137 struct gs_cb *gscb;
3138
3139 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3140 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3141 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3142 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3143 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3144 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3145 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3146 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3147 /* some control register changes require a tlb flush */
3148 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3149 }
3150 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3151 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3152 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3153 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3154 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3155 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3156 }
3157 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3158 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3159 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3160 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3161 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3162 kvm_clear_async_pf_completion_queue(vcpu);
3163 }
3164 /*
3165 * If userspace sets the riccb (e.g. after migration) to a valid state,
3166 * we should enable RI here instead of doing the lazy enablement.
3167 */
3168 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3169 test_kvm_facility(vcpu->kvm, 64) &&
3170 riccb->valid &&
3171 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3172 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3173 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3174 }
3175 /*
3176 * If userspace sets the gscb (e.g. after migration) to non-zero,
3177 * we should enable GS here instead of doing the lazy enablement.
3178 */
3179 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3180 test_kvm_facility(vcpu->kvm, 133) &&
3181 gscb->gssm &&
3182 !vcpu->arch.gs_enabled) {
3183 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3184 vcpu->arch.sie_block->ecb |= ECB_GS;
3185 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3186 vcpu->arch.gs_enabled = 1;
3187 }
3188 save_access_regs(vcpu->arch.host_acrs);
3189 restore_access_regs(vcpu->run->s.regs.acrs);
3190 /* save host (userspace) fprs/vrs */
3191 save_fpu_regs();
3192 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3193 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3194 if (MACHINE_HAS_VX)
3195 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3196 else
3197 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3198 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3199 if (test_fp_ctl(current->thread.fpu.fpc))
3200 /* User space provided an invalid FPC, let's clear it */
3201 current->thread.fpu.fpc = 0;
3202 if (MACHINE_HAS_GS) {
3203 preempt_disable();
3204 __ctl_set_bit(2, 4);
3205 if (current->thread.gs_cb) {
3206 vcpu->arch.host_gscb = current->thread.gs_cb;
3207 save_gs_cb(vcpu->arch.host_gscb);
3208 }
3209 if (vcpu->arch.gs_enabled) {
3210 current->thread.gs_cb = (struct gs_cb *)
3211 &vcpu->run->s.regs.gscb;
3212 restore_gs_cb(current->thread.gs_cb);
3213 }
3214 preempt_enable();
3215 }
3216
3217 kvm_run->kvm_dirty_regs = 0;
3218 }
3219
3220 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3221 {
3222 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3223 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3224 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3225 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3226 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3227 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3228 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3229 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3230 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3231 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3232 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3233 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3234 save_access_regs(vcpu->run->s.regs.acrs);
3235 restore_access_regs(vcpu->arch.host_acrs);
3236 /* Save guest register state */
3237 save_fpu_regs();
3238 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3239 /* Restore will be done lazily at return */
3240 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3241 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3242 if (MACHINE_HAS_GS) {
3243 __ctl_set_bit(2, 4);
3244 if (vcpu->arch.gs_enabled)
3245 save_gs_cb(current->thread.gs_cb);
3246 preempt_disable();
3247 current->thread.gs_cb = vcpu->arch.host_gscb;
3248 restore_gs_cb(vcpu->arch.host_gscb);
3249 preempt_enable();
3250 if (!vcpu->arch.host_gscb)
3251 __ctl_clear_bit(2, 4);
3252 vcpu->arch.host_gscb = NULL;
3253 }
3254
3255 }
3256
3257 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3258 {
3259 int rc;
3260 sigset_t sigsaved;
3261
3262 if (kvm_run->immediate_exit)
3263 return -EINTR;
3264
3265 if (guestdbg_exit_pending(vcpu)) {
3266 kvm_s390_prepare_debug_exit(vcpu);
3267 return 0;
3268 }
3269
3270 if (vcpu->sigset_active)
3271 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3272
3273 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3274 kvm_s390_vcpu_start(vcpu);
3275 } else if (is_vcpu_stopped(vcpu)) {
3276 pr_err_ratelimited("can't run stopped vcpu %d\n",
3277 vcpu->vcpu_id);
3278 return -EINVAL;
3279 }
3280
3281 sync_regs(vcpu, kvm_run);
3282 enable_cpu_timer_accounting(vcpu);
3283
3284 might_fault();
3285 rc = __vcpu_run(vcpu);
3286
3287 if (signal_pending(current) && !rc) {
3288 kvm_run->exit_reason = KVM_EXIT_INTR;
3289 rc = -EINTR;
3290 }
3291
3292 if (guestdbg_exit_pending(vcpu) && !rc) {
3293 kvm_s390_prepare_debug_exit(vcpu);
3294 rc = 0;
3295 }
3296
3297 if (rc == -EREMOTE) {
3298 /* userspace support is needed, kvm_run has been prepared */
3299 rc = 0;
3300 }
3301
3302 disable_cpu_timer_accounting(vcpu);
3303 store_regs(vcpu, kvm_run);
3304
3305 if (vcpu->sigset_active)
3306 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3307
3308 vcpu->stat.exit_userspace++;
3309 return rc;
3310 }
3311
3312 /*
3313 * store status at address
3314 * we use have two special cases:
3315 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3316 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3317 */
3318 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3319 {
3320 unsigned char archmode = 1;
3321 freg_t fprs[NUM_FPRS];
3322 unsigned int px;
3323 u64 clkcomp, cputm;
3324 int rc;
3325
3326 px = kvm_s390_get_prefix(vcpu);
3327 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3328 if (write_guest_abs(vcpu, 163, &archmode, 1))
3329 return -EFAULT;
3330 gpa = 0;
3331 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3332 if (write_guest_real(vcpu, 163, &archmode, 1))
3333 return -EFAULT;
3334 gpa = px;
3335 } else
3336 gpa -= __LC_FPREGS_SAVE_AREA;
3337
3338 /* manually convert vector registers if necessary */
3339 if (MACHINE_HAS_VX) {
3340 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3341 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3342 fprs, 128);
3343 } else {
3344 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3345 vcpu->run->s.regs.fprs, 128);
3346 }
3347 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3348 vcpu->run->s.regs.gprs, 128);
3349 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3350 &vcpu->arch.sie_block->gpsw, 16);
3351 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3352 &px, 4);
3353 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3354 &vcpu->run->s.regs.fpc, 4);
3355 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3356 &vcpu->arch.sie_block->todpr, 4);
3357 cputm = kvm_s390_get_cpu_timer(vcpu);
3358 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3359 &cputm, 8);
3360 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3361 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3362 &clkcomp, 8);
3363 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3364 &vcpu->run->s.regs.acrs, 64);
3365 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3366 &vcpu->arch.sie_block->gcr, 128);
3367 return rc ? -EFAULT : 0;
3368 }
3369
3370 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3371 {
3372 /*
3373 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3374 * switch in the run ioctl. Let's update our copies before we save
3375 * it into the save area
3376 */
3377 save_fpu_regs();
3378 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3379 save_access_regs(vcpu->run->s.regs.acrs);
3380
3381 return kvm_s390_store_status_unloaded(vcpu, addr);
3382 }
3383
3384 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3385 {
3386 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3387 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3388 }
3389
3390 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3391 {
3392 unsigned int i;
3393 struct kvm_vcpu *vcpu;
3394
3395 kvm_for_each_vcpu(i, vcpu, kvm) {
3396 __disable_ibs_on_vcpu(vcpu);
3397 }
3398 }
3399
3400 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3401 {
3402 if (!sclp.has_ibs)
3403 return;
3404 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3405 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3406 }
3407
3408 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3409 {
3410 int i, online_vcpus, started_vcpus = 0;
3411
3412 if (!is_vcpu_stopped(vcpu))
3413 return;
3414
3415 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3416 /* Only one cpu at a time may enter/leave the STOPPED state. */
3417 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3418 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3419
3420 for (i = 0; i < online_vcpus; i++) {
3421 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3422 started_vcpus++;
3423 }
3424
3425 if (started_vcpus == 0) {
3426 /* we're the only active VCPU -> speed it up */
3427 __enable_ibs_on_vcpu(vcpu);
3428 } else if (started_vcpus == 1) {
3429 /*
3430 * As we are starting a second VCPU, we have to disable
3431 * the IBS facility on all VCPUs to remove potentially
3432 * oustanding ENABLE requests.
3433 */
3434 __disable_ibs_on_all_vcpus(vcpu->kvm);
3435 }
3436
3437 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3438 /*
3439 * Another VCPU might have used IBS while we were offline.
3440 * Let's play safe and flush the VCPU at startup.
3441 */
3442 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3443 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3444 return;
3445 }
3446
3447 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3448 {
3449 int i, online_vcpus, started_vcpus = 0;
3450 struct kvm_vcpu *started_vcpu = NULL;
3451
3452 if (is_vcpu_stopped(vcpu))
3453 return;
3454
3455 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3456 /* Only one cpu at a time may enter/leave the STOPPED state. */
3457 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3458 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3459
3460 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3461 kvm_s390_clear_stop_irq(vcpu);
3462
3463 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3464 __disable_ibs_on_vcpu(vcpu);
3465
3466 for (i = 0; i < online_vcpus; i++) {
3467 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3468 started_vcpus++;
3469 started_vcpu = vcpu->kvm->vcpus[i];
3470 }
3471 }
3472
3473 if (started_vcpus == 1) {
3474 /*
3475 * As we only have one VCPU left, we want to enable the
3476 * IBS facility for that VCPU to speed it up.
3477 */
3478 __enable_ibs_on_vcpu(started_vcpu);
3479 }
3480
3481 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3482 return;
3483 }
3484
3485 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3486 struct kvm_enable_cap *cap)
3487 {
3488 int r;
3489
3490 if (cap->flags)
3491 return -EINVAL;
3492
3493 switch (cap->cap) {
3494 case KVM_CAP_S390_CSS_SUPPORT:
3495 if (!vcpu->kvm->arch.css_support) {
3496 vcpu->kvm->arch.css_support = 1;
3497 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3498 trace_kvm_s390_enable_css(vcpu->kvm);
3499 }
3500 r = 0;
3501 break;
3502 default:
3503 r = -EINVAL;
3504 break;
3505 }
3506 return r;
3507 }
3508
3509 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3510 struct kvm_s390_mem_op *mop)
3511 {
3512 void __user *uaddr = (void __user *)mop->buf;
3513 void *tmpbuf = NULL;
3514 int r, srcu_idx;
3515 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3516 | KVM_S390_MEMOP_F_CHECK_ONLY;
3517
3518 if (mop->flags & ~supported_flags)
3519 return -EINVAL;
3520
3521 if (mop->size > MEM_OP_MAX_SIZE)
3522 return -E2BIG;
3523
3524 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3525 tmpbuf = vmalloc(mop->size);
3526 if (!tmpbuf)
3527 return -ENOMEM;
3528 }
3529
3530 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3531
3532 switch (mop->op) {
3533 case KVM_S390_MEMOP_LOGICAL_READ:
3534 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3535 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3536 mop->size, GACC_FETCH);
3537 break;
3538 }
3539 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3540 if (r == 0) {
3541 if (copy_to_user(uaddr, tmpbuf, mop->size))
3542 r = -EFAULT;
3543 }
3544 break;
3545 case KVM_S390_MEMOP_LOGICAL_WRITE:
3546 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3547 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3548 mop->size, GACC_STORE);
3549 break;
3550 }
3551 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3552 r = -EFAULT;
3553 break;
3554 }
3555 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3556 break;
3557 default:
3558 r = -EINVAL;
3559 }
3560
3561 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3562
3563 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3564 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3565
3566 vfree(tmpbuf);
3567 return r;
3568 }
3569
3570 long kvm_arch_vcpu_ioctl(struct file *filp,
3571 unsigned int ioctl, unsigned long arg)
3572 {
3573 struct kvm_vcpu *vcpu = filp->private_data;
3574 void __user *argp = (void __user *)arg;
3575 int idx;
3576 long r;
3577
3578 switch (ioctl) {
3579 case KVM_S390_IRQ: {
3580 struct kvm_s390_irq s390irq;
3581
3582 r = -EFAULT;
3583 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3584 break;
3585 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3586 break;
3587 }
3588 case KVM_S390_INTERRUPT: {
3589 struct kvm_s390_interrupt s390int;
3590 struct kvm_s390_irq s390irq;
3591
3592 r = -EFAULT;
3593 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3594 break;
3595 if (s390int_to_s390irq(&s390int, &s390irq))
3596 return -EINVAL;
3597 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3598 break;
3599 }
3600 case KVM_S390_STORE_STATUS:
3601 idx = srcu_read_lock(&vcpu->kvm->srcu);
3602 r = kvm_s390_vcpu_store_status(vcpu, arg);
3603 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3604 break;
3605 case KVM_S390_SET_INITIAL_PSW: {
3606 psw_t psw;
3607
3608 r = -EFAULT;
3609 if (copy_from_user(&psw, argp, sizeof(psw)))
3610 break;
3611 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3612 break;
3613 }
3614 case KVM_S390_INITIAL_RESET:
3615 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3616 break;
3617 case KVM_SET_ONE_REG:
3618 case KVM_GET_ONE_REG: {
3619 struct kvm_one_reg reg;
3620 r = -EFAULT;
3621 if (copy_from_user(&reg, argp, sizeof(reg)))
3622 break;
3623 if (ioctl == KVM_SET_ONE_REG)
3624 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3625 else
3626 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3627 break;
3628 }
3629 #ifdef CONFIG_KVM_S390_UCONTROL
3630 case KVM_S390_UCAS_MAP: {
3631 struct kvm_s390_ucas_mapping ucasmap;
3632
3633 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3634 r = -EFAULT;
3635 break;
3636 }
3637
3638 if (!kvm_is_ucontrol(vcpu->kvm)) {
3639 r = -EINVAL;
3640 break;
3641 }
3642
3643 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3644 ucasmap.vcpu_addr, ucasmap.length);
3645 break;
3646 }
3647 case KVM_S390_UCAS_UNMAP: {
3648 struct kvm_s390_ucas_mapping ucasmap;
3649
3650 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3651 r = -EFAULT;
3652 break;
3653 }
3654
3655 if (!kvm_is_ucontrol(vcpu->kvm)) {
3656 r = -EINVAL;
3657 break;
3658 }
3659
3660 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3661 ucasmap.length);
3662 break;
3663 }
3664 #endif
3665 case KVM_S390_VCPU_FAULT: {
3666 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3667 break;
3668 }
3669 case KVM_ENABLE_CAP:
3670 {
3671 struct kvm_enable_cap cap;
3672 r = -EFAULT;
3673 if (copy_from_user(&cap, argp, sizeof(cap)))
3674 break;
3675 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3676 break;
3677 }
3678 case KVM_S390_MEM_OP: {
3679 struct kvm_s390_mem_op mem_op;
3680
3681 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3682 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3683 else
3684 r = -EFAULT;
3685 break;
3686 }
3687 case KVM_S390_SET_IRQ_STATE: {
3688 struct kvm_s390_irq_state irq_state;
3689
3690 r = -EFAULT;
3691 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3692 break;
3693 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3694 irq_state.len == 0 ||
3695 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3696 r = -EINVAL;
3697 break;
3698 }
3699 r = kvm_s390_set_irq_state(vcpu,
3700 (void __user *) irq_state.buf,
3701 irq_state.len);
3702 break;
3703 }
3704 case KVM_S390_GET_IRQ_STATE: {
3705 struct kvm_s390_irq_state irq_state;
3706
3707 r = -EFAULT;
3708 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3709 break;
3710 if (irq_state.len == 0) {
3711 r = -EINVAL;
3712 break;
3713 }
3714 r = kvm_s390_get_irq_state(vcpu,
3715 (__u8 __user *) irq_state.buf,
3716 irq_state.len);
3717 break;
3718 }
3719 default:
3720 r = -ENOTTY;
3721 }
3722 return r;
3723 }
3724
3725 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3726 {
3727 #ifdef CONFIG_KVM_S390_UCONTROL
3728 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3729 && (kvm_is_ucontrol(vcpu->kvm))) {
3730 vmf->page = virt_to_page(vcpu->arch.sie_block);
3731 get_page(vmf->page);
3732 return 0;
3733 }
3734 #endif
3735 return VM_FAULT_SIGBUS;
3736 }
3737
3738 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3739 unsigned long npages)
3740 {
3741 return 0;
3742 }
3743
3744 /* Section: memory related */
3745 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3746 struct kvm_memory_slot *memslot,
3747 const struct kvm_userspace_memory_region *mem,
3748 enum kvm_mr_change change)
3749 {
3750 /* A few sanity checks. We can have memory slots which have to be
3751 located/ended at a segment boundary (1MB). The memory in userland is
3752 ok to be fragmented into various different vmas. It is okay to mmap()
3753 and munmap() stuff in this slot after doing this call at any time */
3754
3755 if (mem->userspace_addr & 0xffffful)
3756 return -EINVAL;
3757
3758 if (mem->memory_size & 0xffffful)
3759 return -EINVAL;
3760
3761 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3762 return -EINVAL;
3763
3764 return 0;
3765 }
3766
3767 void kvm_arch_commit_memory_region(struct kvm *kvm,
3768 const struct kvm_userspace_memory_region *mem,
3769 const struct kvm_memory_slot *old,
3770 const struct kvm_memory_slot *new,
3771 enum kvm_mr_change change)
3772 {
3773 int rc;
3774
3775 /* If the basics of the memslot do not change, we do not want
3776 * to update the gmap. Every update causes several unnecessary
3777 * segment translation exceptions. This is usually handled just
3778 * fine by the normal fault handler + gmap, but it will also
3779 * cause faults on the prefix page of running guest CPUs.
3780 */
3781 if (old->userspace_addr == mem->userspace_addr &&
3782 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3783 old->npages * PAGE_SIZE == mem->memory_size)
3784 return;
3785
3786 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3787 mem->guest_phys_addr, mem->memory_size);
3788 if (rc)
3789 pr_warn("failed to commit memory region\n");
3790 return;
3791 }
3792
3793 static inline unsigned long nonhyp_mask(int i)
3794 {
3795 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3796
3797 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3798 }
3799
3800 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3801 {
3802 vcpu->valid_wakeup = false;
3803 }
3804
3805 static int __init kvm_s390_init(void)
3806 {
3807 int i;
3808
3809 if (!sclp.has_sief2) {
3810 pr_info("SIE not available\n");
3811 return -ENODEV;
3812 }
3813
3814 for (i = 0; i < 16; i++)
3815 kvm_s390_fac_list_mask[i] |=
3816 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3817
3818 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3819 }
3820
3821 static void __exit kvm_s390_exit(void)
3822 {
3823 kvm_exit();
3824 }
3825
3826 module_init(kvm_s390_init);
3827 module_exit(kvm_s390_exit);
3828
3829 /*
3830 * Enable autoloading of the kvm module.
3831 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3832 * since x86 takes a different approach.
3833 */
3834 #include <linux/miscdevice.h>
3835 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3836 MODULE_ALIAS("devname:kvm");