]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/s390/kvm/kvm-s390.c
Merge tag 'ntb-4.13-bugfixes' of git://github.com/jonmason/ntb
[mirror_ubuntu-artful-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 { NULL }
131 };
132
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144 return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159 /* every s390 is virtualization enabled ;-) */
160 return 0;
161 }
162
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164 unsigned long end);
165
166 /*
167 * This callback is executed during stop_machine(). All CPUs are therefore
168 * temporarily stopped. In order not to change guest behavior, we have to
169 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170 * so a CPU won't be stopped while calculating with the epoch.
171 */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173 void *v)
174 {
175 struct kvm *kvm;
176 struct kvm_vcpu *vcpu;
177 int i;
178 unsigned long long *delta = v;
179
180 list_for_each_entry(kvm, &vm_list, vm_list) {
181 kvm->arch.epoch -= *delta;
182 kvm_for_each_vcpu(i, vcpu, kvm) {
183 vcpu->arch.sie_block->epoch -= *delta;
184 if (vcpu->arch.cputm_enabled)
185 vcpu->arch.cputm_start += *delta;
186 if (vcpu->arch.vsie_block)
187 vcpu->arch.vsie_block->epoch -= *delta;
188 }
189 }
190 return NOTIFY_OK;
191 }
192
193 static struct notifier_block kvm_clock_notifier = {
194 .notifier_call = kvm_clock_sync,
195 };
196
197 int kvm_arch_hardware_setup(void)
198 {
199 gmap_notifier.notifier_call = kvm_gmap_notifier;
200 gmap_register_pte_notifier(&gmap_notifier);
201 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202 gmap_register_pte_notifier(&vsie_gmap_notifier);
203 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204 &kvm_clock_notifier);
205 return 0;
206 }
207
208 void kvm_arch_hardware_unsetup(void)
209 {
210 gmap_unregister_pte_notifier(&gmap_notifier);
211 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213 &kvm_clock_notifier);
214 }
215
216 static void allow_cpu_feat(unsigned long nr)
217 {
218 set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220
221 static inline int plo_test_bit(unsigned char nr)
222 {
223 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224 int cc;
225
226 asm volatile(
227 /* Parameter registers are ignored for "test bit" */
228 " plo 0,0,0,0(0)\n"
229 " ipm %0\n"
230 " srl %0,28\n"
231 : "=d" (cc)
232 : "d" (r0)
233 : "cc");
234 return cc == 0;
235 }
236
237 static void kvm_s390_cpu_feat_init(void)
238 {
239 int i;
240
241 for (i = 0; i < 256; ++i) {
242 if (plo_test_bit(i))
243 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244 }
245
246 if (test_facility(28)) /* TOD-clock steering */
247 ptff(kvm_s390_available_subfunc.ptff,
248 sizeof(kvm_s390_available_subfunc.ptff),
249 PTFF_QAF);
250
251 if (test_facility(17)) { /* MSA */
252 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.kmac);
254 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kmc);
256 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.km);
258 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kimd);
260 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.klmd);
262 }
263 if (test_facility(76)) /* MSA3 */
264 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.pckmo);
266 if (test_facility(77)) { /* MSA4 */
267 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmctr);
269 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.kmf);
271 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272 kvm_s390_available_subfunc.kmo);
273 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.pcc);
275 }
276 if (test_facility(57)) /* MSA5 */
277 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.ppno);
279
280 if (test_facility(146)) /* MSA8 */
281 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282 kvm_s390_available_subfunc.kma);
283
284 if (MACHINE_HAS_ESOP)
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286 /*
287 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289 */
290 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291 !test_facility(3) || !nested)
292 return;
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294 if (sclp.has_64bscao)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296 if (sclp.has_siif)
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298 if (sclp.has_gpere)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300 if (sclp.has_gsls)
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302 if (sclp.has_ib)
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304 if (sclp.has_cei)
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306 if (sclp.has_ibs)
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308 if (sclp.has_kss)
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310 /*
311 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312 * all skey handling functions read/set the skey from the PGSTE
313 * instead of the real storage key.
314 *
315 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316 * pages being detected as preserved although they are resident.
317 *
318 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320 *
321 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324 *
325 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326 * cannot easily shadow the SCA because of the ipte lock.
327 */
328 }
329
330 int kvm_arch_init(void *opaque)
331 {
332 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333 if (!kvm_s390_dbf)
334 return -ENOMEM;
335
336 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337 debug_unregister(kvm_s390_dbf);
338 return -ENOMEM;
339 }
340
341 kvm_s390_cpu_feat_init();
342
343 /* Register floating interrupt controller interface. */
344 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346
347 void kvm_arch_exit(void)
348 {
349 debug_unregister(kvm_s390_dbf);
350 }
351
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354 unsigned int ioctl, unsigned long arg)
355 {
356 if (ioctl == KVM_S390_ENABLE_SIE)
357 return s390_enable_sie();
358 return -EINVAL;
359 }
360
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363 int r;
364
365 switch (ext) {
366 case KVM_CAP_S390_PSW:
367 case KVM_CAP_S390_GMAP:
368 case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370 case KVM_CAP_S390_UCONTROL:
371 #endif
372 case KVM_CAP_ASYNC_PF:
373 case KVM_CAP_SYNC_REGS:
374 case KVM_CAP_ONE_REG:
375 case KVM_CAP_ENABLE_CAP:
376 case KVM_CAP_S390_CSS_SUPPORT:
377 case KVM_CAP_IOEVENTFD:
378 case KVM_CAP_DEVICE_CTRL:
379 case KVM_CAP_ENABLE_CAP_VM:
380 case KVM_CAP_S390_IRQCHIP:
381 case KVM_CAP_VM_ATTRIBUTES:
382 case KVM_CAP_MP_STATE:
383 case KVM_CAP_IMMEDIATE_EXIT:
384 case KVM_CAP_S390_INJECT_IRQ:
385 case KVM_CAP_S390_USER_SIGP:
386 case KVM_CAP_S390_USER_STSI:
387 case KVM_CAP_S390_SKEYS:
388 case KVM_CAP_S390_IRQ_STATE:
389 case KVM_CAP_S390_USER_INSTR0:
390 case KVM_CAP_S390_CMMA_MIGRATION:
391 case KVM_CAP_S390_AIS:
392 r = 1;
393 break;
394 case KVM_CAP_S390_MEM_OP:
395 r = MEM_OP_MAX_SIZE;
396 break;
397 case KVM_CAP_NR_VCPUS:
398 case KVM_CAP_MAX_VCPUS:
399 r = KVM_S390_BSCA_CPU_SLOTS;
400 if (!kvm_s390_use_sca_entries())
401 r = KVM_MAX_VCPUS;
402 else if (sclp.has_esca && sclp.has_64bscao)
403 r = KVM_S390_ESCA_CPU_SLOTS;
404 break;
405 case KVM_CAP_NR_MEMSLOTS:
406 r = KVM_USER_MEM_SLOTS;
407 break;
408 case KVM_CAP_S390_COW:
409 r = MACHINE_HAS_ESOP;
410 break;
411 case KVM_CAP_S390_VECTOR_REGISTERS:
412 r = MACHINE_HAS_VX;
413 break;
414 case KVM_CAP_S390_RI:
415 r = test_facility(64);
416 break;
417 case KVM_CAP_S390_GS:
418 r = test_facility(133);
419 break;
420 default:
421 r = 0;
422 }
423 return r;
424 }
425
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427 struct kvm_memory_slot *memslot)
428 {
429 gfn_t cur_gfn, last_gfn;
430 unsigned long address;
431 struct gmap *gmap = kvm->arch.gmap;
432
433 /* Loop over all guest pages */
434 last_gfn = memslot->base_gfn + memslot->npages;
435 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436 address = gfn_to_hva_memslot(memslot, cur_gfn);
437
438 if (test_and_clear_guest_dirty(gmap->mm, address))
439 mark_page_dirty(kvm, cur_gfn);
440 if (fatal_signal_pending(current))
441 return;
442 cond_resched();
443 }
444 }
445
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448
449 /*
450 * Get (and clear) the dirty memory log for a memory slot.
451 */
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453 struct kvm_dirty_log *log)
454 {
455 int r;
456 unsigned long n;
457 struct kvm_memslots *slots;
458 struct kvm_memory_slot *memslot;
459 int is_dirty = 0;
460
461 if (kvm_is_ucontrol(kvm))
462 return -EINVAL;
463
464 mutex_lock(&kvm->slots_lock);
465
466 r = -EINVAL;
467 if (log->slot >= KVM_USER_MEM_SLOTS)
468 goto out;
469
470 slots = kvm_memslots(kvm);
471 memslot = id_to_memslot(slots, log->slot);
472 r = -ENOENT;
473 if (!memslot->dirty_bitmap)
474 goto out;
475
476 kvm_s390_sync_dirty_log(kvm, memslot);
477 r = kvm_get_dirty_log(kvm, log, &is_dirty);
478 if (r)
479 goto out;
480
481 /* Clear the dirty log */
482 if (is_dirty) {
483 n = kvm_dirty_bitmap_bytes(memslot);
484 memset(memslot->dirty_bitmap, 0, n);
485 }
486 r = 0;
487 out:
488 mutex_unlock(&kvm->slots_lock);
489 return r;
490 }
491
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 {
494 unsigned int i;
495 struct kvm_vcpu *vcpu;
496
497 kvm_for_each_vcpu(i, vcpu, kvm) {
498 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
499 }
500 }
501
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
503 {
504 int r;
505
506 if (cap->flags)
507 return -EINVAL;
508
509 switch (cap->cap) {
510 case KVM_CAP_S390_IRQCHIP:
511 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512 kvm->arch.use_irqchip = 1;
513 r = 0;
514 break;
515 case KVM_CAP_S390_USER_SIGP:
516 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517 kvm->arch.user_sigp = 1;
518 r = 0;
519 break;
520 case KVM_CAP_S390_VECTOR_REGISTERS:
521 mutex_lock(&kvm->lock);
522 if (kvm->created_vcpus) {
523 r = -EBUSY;
524 } else if (MACHINE_HAS_VX) {
525 set_kvm_facility(kvm->arch.model.fac_mask, 129);
526 set_kvm_facility(kvm->arch.model.fac_list, 129);
527 if (test_facility(134)) {
528 set_kvm_facility(kvm->arch.model.fac_mask, 134);
529 set_kvm_facility(kvm->arch.model.fac_list, 134);
530 }
531 if (test_facility(135)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 135);
533 set_kvm_facility(kvm->arch.model.fac_list, 135);
534 }
535 r = 0;
536 } else
537 r = -EINVAL;
538 mutex_unlock(&kvm->lock);
539 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540 r ? "(not available)" : "(success)");
541 break;
542 case KVM_CAP_S390_RI:
543 r = -EINVAL;
544 mutex_lock(&kvm->lock);
545 if (kvm->created_vcpus) {
546 r = -EBUSY;
547 } else if (test_facility(64)) {
548 set_kvm_facility(kvm->arch.model.fac_mask, 64);
549 set_kvm_facility(kvm->arch.model.fac_list, 64);
550 r = 0;
551 }
552 mutex_unlock(&kvm->lock);
553 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554 r ? "(not available)" : "(success)");
555 break;
556 case KVM_CAP_S390_AIS:
557 mutex_lock(&kvm->lock);
558 if (kvm->created_vcpus) {
559 r = -EBUSY;
560 } else {
561 set_kvm_facility(kvm->arch.model.fac_mask, 72);
562 set_kvm_facility(kvm->arch.model.fac_list, 72);
563 r = 0;
564 }
565 mutex_unlock(&kvm->lock);
566 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
567 r ? "(not available)" : "(success)");
568 break;
569 case KVM_CAP_S390_GS:
570 r = -EINVAL;
571 mutex_lock(&kvm->lock);
572 if (atomic_read(&kvm->online_vcpus)) {
573 r = -EBUSY;
574 } else if (test_facility(133)) {
575 set_kvm_facility(kvm->arch.model.fac_mask, 133);
576 set_kvm_facility(kvm->arch.model.fac_list, 133);
577 r = 0;
578 }
579 mutex_unlock(&kvm->lock);
580 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
581 r ? "(not available)" : "(success)");
582 break;
583 case KVM_CAP_S390_USER_STSI:
584 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
585 kvm->arch.user_stsi = 1;
586 r = 0;
587 break;
588 case KVM_CAP_S390_USER_INSTR0:
589 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
590 kvm->arch.user_instr0 = 1;
591 icpt_operexc_on_all_vcpus(kvm);
592 r = 0;
593 break;
594 default:
595 r = -EINVAL;
596 break;
597 }
598 return r;
599 }
600
601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603 int ret;
604
605 switch (attr->attr) {
606 case KVM_S390_VM_MEM_LIMIT_SIZE:
607 ret = 0;
608 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
609 kvm->arch.mem_limit);
610 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
611 ret = -EFAULT;
612 break;
613 default:
614 ret = -ENXIO;
615 break;
616 }
617 return ret;
618 }
619
620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
621 {
622 int ret;
623 unsigned int idx;
624 switch (attr->attr) {
625 case KVM_S390_VM_MEM_ENABLE_CMMA:
626 ret = -ENXIO;
627 if (!sclp.has_cmma)
628 break;
629
630 ret = -EBUSY;
631 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
632 mutex_lock(&kvm->lock);
633 if (!kvm->created_vcpus) {
634 kvm->arch.use_cmma = 1;
635 ret = 0;
636 }
637 mutex_unlock(&kvm->lock);
638 break;
639 case KVM_S390_VM_MEM_CLR_CMMA:
640 ret = -ENXIO;
641 if (!sclp.has_cmma)
642 break;
643 ret = -EINVAL;
644 if (!kvm->arch.use_cmma)
645 break;
646
647 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
648 mutex_lock(&kvm->lock);
649 idx = srcu_read_lock(&kvm->srcu);
650 s390_reset_cmma(kvm->arch.gmap->mm);
651 srcu_read_unlock(&kvm->srcu, idx);
652 mutex_unlock(&kvm->lock);
653 ret = 0;
654 break;
655 case KVM_S390_VM_MEM_LIMIT_SIZE: {
656 unsigned long new_limit;
657
658 if (kvm_is_ucontrol(kvm))
659 return -EINVAL;
660
661 if (get_user(new_limit, (u64 __user *)attr->addr))
662 return -EFAULT;
663
664 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
665 new_limit > kvm->arch.mem_limit)
666 return -E2BIG;
667
668 if (!new_limit)
669 return -EINVAL;
670
671 /* gmap_create takes last usable address */
672 if (new_limit != KVM_S390_NO_MEM_LIMIT)
673 new_limit -= 1;
674
675 ret = -EBUSY;
676 mutex_lock(&kvm->lock);
677 if (!kvm->created_vcpus) {
678 /* gmap_create will round the limit up */
679 struct gmap *new = gmap_create(current->mm, new_limit);
680
681 if (!new) {
682 ret = -ENOMEM;
683 } else {
684 gmap_remove(kvm->arch.gmap);
685 new->private = kvm;
686 kvm->arch.gmap = new;
687 ret = 0;
688 }
689 }
690 mutex_unlock(&kvm->lock);
691 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
692 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
693 (void *) kvm->arch.gmap->asce);
694 break;
695 }
696 default:
697 ret = -ENXIO;
698 break;
699 }
700 return ret;
701 }
702
703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
704
705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
706 {
707 struct kvm_vcpu *vcpu;
708 int i;
709
710 if (!test_kvm_facility(kvm, 76))
711 return -EINVAL;
712
713 mutex_lock(&kvm->lock);
714 switch (attr->attr) {
715 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
716 get_random_bytes(
717 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
718 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
719 kvm->arch.crypto.aes_kw = 1;
720 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
721 break;
722 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
723 get_random_bytes(
724 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
725 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
726 kvm->arch.crypto.dea_kw = 1;
727 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
728 break;
729 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
730 kvm->arch.crypto.aes_kw = 0;
731 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
732 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
733 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
734 break;
735 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
736 kvm->arch.crypto.dea_kw = 0;
737 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
738 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
739 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
740 break;
741 default:
742 mutex_unlock(&kvm->lock);
743 return -ENXIO;
744 }
745
746 kvm_for_each_vcpu(i, vcpu, kvm) {
747 kvm_s390_vcpu_crypto_setup(vcpu);
748 exit_sie(vcpu);
749 }
750 mutex_unlock(&kvm->lock);
751 return 0;
752 }
753
754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
755 {
756 int cx;
757 struct kvm_vcpu *vcpu;
758
759 kvm_for_each_vcpu(cx, vcpu, kvm)
760 kvm_s390_sync_request(req, vcpu);
761 }
762
763 /*
764 * Must be called with kvm->srcu held to avoid races on memslots, and with
765 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
766 */
767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
768 {
769 struct kvm_s390_migration_state *mgs;
770 struct kvm_memory_slot *ms;
771 /* should be the only one */
772 struct kvm_memslots *slots;
773 unsigned long ram_pages;
774 int slotnr;
775
776 /* migration mode already enabled */
777 if (kvm->arch.migration_state)
778 return 0;
779
780 slots = kvm_memslots(kvm);
781 if (!slots || !slots->used_slots)
782 return -EINVAL;
783
784 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
785 if (!mgs)
786 return -ENOMEM;
787 kvm->arch.migration_state = mgs;
788
789 if (kvm->arch.use_cmma) {
790 /*
791 * Get the last slot. They should be sorted by base_gfn, so the
792 * last slot is also the one at the end of the address space.
793 * We have verified above that at least one slot is present.
794 */
795 ms = slots->memslots + slots->used_slots - 1;
796 /* round up so we only use full longs */
797 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
798 /* allocate enough bytes to store all the bits */
799 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
800 if (!mgs->pgste_bitmap) {
801 kfree(mgs);
802 kvm->arch.migration_state = NULL;
803 return -ENOMEM;
804 }
805
806 mgs->bitmap_size = ram_pages;
807 atomic64_set(&mgs->dirty_pages, ram_pages);
808 /* mark all the pages in active slots as dirty */
809 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
810 ms = slots->memslots + slotnr;
811 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
812 }
813
814 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
815 }
816 return 0;
817 }
818
819 /*
820 * Must be called with kvm->lock to avoid races with ourselves and
821 * kvm_s390_vm_start_migration.
822 */
823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
824 {
825 struct kvm_s390_migration_state *mgs;
826
827 /* migration mode already disabled */
828 if (!kvm->arch.migration_state)
829 return 0;
830 mgs = kvm->arch.migration_state;
831 kvm->arch.migration_state = NULL;
832
833 if (kvm->arch.use_cmma) {
834 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
835 vfree(mgs->pgste_bitmap);
836 }
837 kfree(mgs);
838 return 0;
839 }
840
841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
842 struct kvm_device_attr *attr)
843 {
844 int idx, res = -ENXIO;
845
846 mutex_lock(&kvm->lock);
847 switch (attr->attr) {
848 case KVM_S390_VM_MIGRATION_START:
849 idx = srcu_read_lock(&kvm->srcu);
850 res = kvm_s390_vm_start_migration(kvm);
851 srcu_read_unlock(&kvm->srcu, idx);
852 break;
853 case KVM_S390_VM_MIGRATION_STOP:
854 res = kvm_s390_vm_stop_migration(kvm);
855 break;
856 default:
857 break;
858 }
859 mutex_unlock(&kvm->lock);
860
861 return res;
862 }
863
864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
865 struct kvm_device_attr *attr)
866 {
867 u64 mig = (kvm->arch.migration_state != NULL);
868
869 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
870 return -ENXIO;
871
872 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
873 return -EFAULT;
874 return 0;
875 }
876
877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
878 {
879 u8 gtod_high;
880
881 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
882 sizeof(gtod_high)))
883 return -EFAULT;
884
885 if (gtod_high != 0)
886 return -EINVAL;
887 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
888
889 return 0;
890 }
891
892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
893 {
894 u64 gtod;
895
896 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
897 return -EFAULT;
898
899 kvm_s390_set_tod_clock(kvm, gtod);
900 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
901 return 0;
902 }
903
904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906 int ret;
907
908 if (attr->flags)
909 return -EINVAL;
910
911 switch (attr->attr) {
912 case KVM_S390_VM_TOD_HIGH:
913 ret = kvm_s390_set_tod_high(kvm, attr);
914 break;
915 case KVM_S390_VM_TOD_LOW:
916 ret = kvm_s390_set_tod_low(kvm, attr);
917 break;
918 default:
919 ret = -ENXIO;
920 break;
921 }
922 return ret;
923 }
924
925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 u8 gtod_high = 0;
928
929 if (copy_to_user((void __user *)attr->addr, &gtod_high,
930 sizeof(gtod_high)))
931 return -EFAULT;
932 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
933
934 return 0;
935 }
936
937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
938 {
939 u64 gtod;
940
941 gtod = kvm_s390_get_tod_clock_fast(kvm);
942 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
943 return -EFAULT;
944 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
945
946 return 0;
947 }
948
949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
950 {
951 int ret;
952
953 if (attr->flags)
954 return -EINVAL;
955
956 switch (attr->attr) {
957 case KVM_S390_VM_TOD_HIGH:
958 ret = kvm_s390_get_tod_high(kvm, attr);
959 break;
960 case KVM_S390_VM_TOD_LOW:
961 ret = kvm_s390_get_tod_low(kvm, attr);
962 break;
963 default:
964 ret = -ENXIO;
965 break;
966 }
967 return ret;
968 }
969
970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 struct kvm_s390_vm_cpu_processor *proc;
973 u16 lowest_ibc, unblocked_ibc;
974 int ret = 0;
975
976 mutex_lock(&kvm->lock);
977 if (kvm->created_vcpus) {
978 ret = -EBUSY;
979 goto out;
980 }
981 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
982 if (!proc) {
983 ret = -ENOMEM;
984 goto out;
985 }
986 if (!copy_from_user(proc, (void __user *)attr->addr,
987 sizeof(*proc))) {
988 kvm->arch.model.cpuid = proc->cpuid;
989 lowest_ibc = sclp.ibc >> 16 & 0xfff;
990 unblocked_ibc = sclp.ibc & 0xfff;
991 if (lowest_ibc && proc->ibc) {
992 if (proc->ibc > unblocked_ibc)
993 kvm->arch.model.ibc = unblocked_ibc;
994 else if (proc->ibc < lowest_ibc)
995 kvm->arch.model.ibc = lowest_ibc;
996 else
997 kvm->arch.model.ibc = proc->ibc;
998 }
999 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000 S390_ARCH_FAC_LIST_SIZE_BYTE);
1001 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002 kvm->arch.model.ibc,
1003 kvm->arch.model.cpuid);
1004 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005 kvm->arch.model.fac_list[0],
1006 kvm->arch.model.fac_list[1],
1007 kvm->arch.model.fac_list[2]);
1008 } else
1009 ret = -EFAULT;
1010 kfree(proc);
1011 out:
1012 mutex_unlock(&kvm->lock);
1013 return ret;
1014 }
1015
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017 struct kvm_device_attr *attr)
1018 {
1019 struct kvm_s390_vm_cpu_feat data;
1020 int ret = -EBUSY;
1021
1022 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023 return -EFAULT;
1024 if (!bitmap_subset((unsigned long *) data.feat,
1025 kvm_s390_available_cpu_feat,
1026 KVM_S390_VM_CPU_FEAT_NR_BITS))
1027 return -EINVAL;
1028
1029 mutex_lock(&kvm->lock);
1030 if (!atomic_read(&kvm->online_vcpus)) {
1031 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032 KVM_S390_VM_CPU_FEAT_NR_BITS);
1033 ret = 0;
1034 }
1035 mutex_unlock(&kvm->lock);
1036 return ret;
1037 }
1038
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040 struct kvm_device_attr *attr)
1041 {
1042 /*
1043 * Once supported by kernel + hw, we have to store the subfunctions
1044 * in kvm->arch and remember that user space configured them.
1045 */
1046 return -ENXIO;
1047 }
1048
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051 int ret = -ENXIO;
1052
1053 switch (attr->attr) {
1054 case KVM_S390_VM_CPU_PROCESSOR:
1055 ret = kvm_s390_set_processor(kvm, attr);
1056 break;
1057 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058 ret = kvm_s390_set_processor_feat(kvm, attr);
1059 break;
1060 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062 break;
1063 }
1064 return ret;
1065 }
1066
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069 struct kvm_s390_vm_cpu_processor *proc;
1070 int ret = 0;
1071
1072 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073 if (!proc) {
1074 ret = -ENOMEM;
1075 goto out;
1076 }
1077 proc->cpuid = kvm->arch.model.cpuid;
1078 proc->ibc = kvm->arch.model.ibc;
1079 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080 S390_ARCH_FAC_LIST_SIZE_BYTE);
1081 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082 kvm->arch.model.ibc,
1083 kvm->arch.model.cpuid);
1084 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085 kvm->arch.model.fac_list[0],
1086 kvm->arch.model.fac_list[1],
1087 kvm->arch.model.fac_list[2]);
1088 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089 ret = -EFAULT;
1090 kfree(proc);
1091 out:
1092 return ret;
1093 }
1094
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097 struct kvm_s390_vm_cpu_machine *mach;
1098 int ret = 0;
1099
1100 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101 if (!mach) {
1102 ret = -ENOMEM;
1103 goto out;
1104 }
1105 get_cpu_id((struct cpuid *) &mach->cpuid);
1106 mach->ibc = sclp.ibc;
1107 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108 S390_ARCH_FAC_LIST_SIZE_BYTE);
1109 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110 sizeof(S390_lowcore.stfle_fac_list));
1111 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1112 kvm->arch.model.ibc,
1113 kvm->arch.model.cpuid);
1114 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1115 mach->fac_mask[0],
1116 mach->fac_mask[1],
1117 mach->fac_mask[2]);
1118 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1119 mach->fac_list[0],
1120 mach->fac_list[1],
1121 mach->fac_list[2]);
1122 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123 ret = -EFAULT;
1124 kfree(mach);
1125 out:
1126 return ret;
1127 }
1128
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130 struct kvm_device_attr *attr)
1131 {
1132 struct kvm_s390_vm_cpu_feat data;
1133
1134 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135 KVM_S390_VM_CPU_FEAT_NR_BITS);
1136 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137 return -EFAULT;
1138 return 0;
1139 }
1140
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142 struct kvm_device_attr *attr)
1143 {
1144 struct kvm_s390_vm_cpu_feat data;
1145
1146 bitmap_copy((unsigned long *) data.feat,
1147 kvm_s390_available_cpu_feat,
1148 KVM_S390_VM_CPU_FEAT_NR_BITS);
1149 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150 return -EFAULT;
1151 return 0;
1152 }
1153
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155 struct kvm_device_attr *attr)
1156 {
1157 /*
1158 * Once we can actually configure subfunctions (kernel + hw support),
1159 * we have to check if they were already set by user space, if so copy
1160 * them from kvm->arch.
1161 */
1162 return -ENXIO;
1163 }
1164
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166 struct kvm_device_attr *attr)
1167 {
1168 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170 return -EFAULT;
1171 return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175 int ret = -ENXIO;
1176
1177 switch (attr->attr) {
1178 case KVM_S390_VM_CPU_PROCESSOR:
1179 ret = kvm_s390_get_processor(kvm, attr);
1180 break;
1181 case KVM_S390_VM_CPU_MACHINE:
1182 ret = kvm_s390_get_machine(kvm, attr);
1183 break;
1184 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185 ret = kvm_s390_get_processor_feat(kvm, attr);
1186 break;
1187 case KVM_S390_VM_CPU_MACHINE_FEAT:
1188 ret = kvm_s390_get_machine_feat(kvm, attr);
1189 break;
1190 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192 break;
1193 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195 break;
1196 }
1197 return ret;
1198 }
1199
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202 int ret;
1203
1204 switch (attr->group) {
1205 case KVM_S390_VM_MEM_CTRL:
1206 ret = kvm_s390_set_mem_control(kvm, attr);
1207 break;
1208 case KVM_S390_VM_TOD:
1209 ret = kvm_s390_set_tod(kvm, attr);
1210 break;
1211 case KVM_S390_VM_CPU_MODEL:
1212 ret = kvm_s390_set_cpu_model(kvm, attr);
1213 break;
1214 case KVM_S390_VM_CRYPTO:
1215 ret = kvm_s390_vm_set_crypto(kvm, attr);
1216 break;
1217 case KVM_S390_VM_MIGRATION:
1218 ret = kvm_s390_vm_set_migration(kvm, attr);
1219 break;
1220 default:
1221 ret = -ENXIO;
1222 break;
1223 }
1224
1225 return ret;
1226 }
1227
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230 int ret;
1231
1232 switch (attr->group) {
1233 case KVM_S390_VM_MEM_CTRL:
1234 ret = kvm_s390_get_mem_control(kvm, attr);
1235 break;
1236 case KVM_S390_VM_TOD:
1237 ret = kvm_s390_get_tod(kvm, attr);
1238 break;
1239 case KVM_S390_VM_CPU_MODEL:
1240 ret = kvm_s390_get_cpu_model(kvm, attr);
1241 break;
1242 case KVM_S390_VM_MIGRATION:
1243 ret = kvm_s390_vm_get_migration(kvm, attr);
1244 break;
1245 default:
1246 ret = -ENXIO;
1247 break;
1248 }
1249
1250 return ret;
1251 }
1252
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255 int ret;
1256
1257 switch (attr->group) {
1258 case KVM_S390_VM_MEM_CTRL:
1259 switch (attr->attr) {
1260 case KVM_S390_VM_MEM_ENABLE_CMMA:
1261 case KVM_S390_VM_MEM_CLR_CMMA:
1262 ret = sclp.has_cmma ? 0 : -ENXIO;
1263 break;
1264 case KVM_S390_VM_MEM_LIMIT_SIZE:
1265 ret = 0;
1266 break;
1267 default:
1268 ret = -ENXIO;
1269 break;
1270 }
1271 break;
1272 case KVM_S390_VM_TOD:
1273 switch (attr->attr) {
1274 case KVM_S390_VM_TOD_LOW:
1275 case KVM_S390_VM_TOD_HIGH:
1276 ret = 0;
1277 break;
1278 default:
1279 ret = -ENXIO;
1280 break;
1281 }
1282 break;
1283 case KVM_S390_VM_CPU_MODEL:
1284 switch (attr->attr) {
1285 case KVM_S390_VM_CPU_PROCESSOR:
1286 case KVM_S390_VM_CPU_MACHINE:
1287 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288 case KVM_S390_VM_CPU_MACHINE_FEAT:
1289 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290 ret = 0;
1291 break;
1292 /* configuring subfunctions is not supported yet */
1293 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294 default:
1295 ret = -ENXIO;
1296 break;
1297 }
1298 break;
1299 case KVM_S390_VM_CRYPTO:
1300 switch (attr->attr) {
1301 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305 ret = 0;
1306 break;
1307 default:
1308 ret = -ENXIO;
1309 break;
1310 }
1311 break;
1312 case KVM_S390_VM_MIGRATION:
1313 ret = 0;
1314 break;
1315 default:
1316 ret = -ENXIO;
1317 break;
1318 }
1319
1320 return ret;
1321 }
1322
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325 uint8_t *keys;
1326 uint64_t hva;
1327 int srcu_idx, i, r = 0;
1328
1329 if (args->flags != 0)
1330 return -EINVAL;
1331
1332 /* Is this guest using storage keys? */
1333 if (!mm_use_skey(current->mm))
1334 return KVM_S390_GET_SKEYS_NONE;
1335
1336 /* Enforce sane limit on memory allocation */
1337 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338 return -EINVAL;
1339
1340 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341 if (!keys)
1342 return -ENOMEM;
1343
1344 down_read(&current->mm->mmap_sem);
1345 srcu_idx = srcu_read_lock(&kvm->srcu);
1346 for (i = 0; i < args->count; i++) {
1347 hva = gfn_to_hva(kvm, args->start_gfn + i);
1348 if (kvm_is_error_hva(hva)) {
1349 r = -EFAULT;
1350 break;
1351 }
1352
1353 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1354 if (r)
1355 break;
1356 }
1357 srcu_read_unlock(&kvm->srcu, srcu_idx);
1358 up_read(&current->mm->mmap_sem);
1359
1360 if (!r) {
1361 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1362 sizeof(uint8_t) * args->count);
1363 if (r)
1364 r = -EFAULT;
1365 }
1366
1367 kvfree(keys);
1368 return r;
1369 }
1370
1371 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1372 {
1373 uint8_t *keys;
1374 uint64_t hva;
1375 int srcu_idx, i, r = 0;
1376
1377 if (args->flags != 0)
1378 return -EINVAL;
1379
1380 /* Enforce sane limit on memory allocation */
1381 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1382 return -EINVAL;
1383
1384 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1385 if (!keys)
1386 return -ENOMEM;
1387
1388 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1389 sizeof(uint8_t) * args->count);
1390 if (r) {
1391 r = -EFAULT;
1392 goto out;
1393 }
1394
1395 /* Enable storage key handling for the guest */
1396 r = s390_enable_skey();
1397 if (r)
1398 goto out;
1399
1400 down_read(&current->mm->mmap_sem);
1401 srcu_idx = srcu_read_lock(&kvm->srcu);
1402 for (i = 0; i < args->count; i++) {
1403 hva = gfn_to_hva(kvm, args->start_gfn + i);
1404 if (kvm_is_error_hva(hva)) {
1405 r = -EFAULT;
1406 break;
1407 }
1408
1409 /* Lowest order bit is reserved */
1410 if (keys[i] & 0x01) {
1411 r = -EINVAL;
1412 break;
1413 }
1414
1415 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1416 if (r)
1417 break;
1418 }
1419 srcu_read_unlock(&kvm->srcu, srcu_idx);
1420 up_read(&current->mm->mmap_sem);
1421 out:
1422 kvfree(keys);
1423 return r;
1424 }
1425
1426 /*
1427 * Base address and length must be sent at the start of each block, therefore
1428 * it's cheaper to send some clean data, as long as it's less than the size of
1429 * two longs.
1430 */
1431 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1432 /* for consistency */
1433 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1434
1435 /*
1436 * This function searches for the next page with dirty CMMA attributes, and
1437 * saves the attributes in the buffer up to either the end of the buffer or
1438 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1439 * no trailing clean bytes are saved.
1440 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1441 * output buffer will indicate 0 as length.
1442 */
1443 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1444 struct kvm_s390_cmma_log *args)
1445 {
1446 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1447 unsigned long bufsize, hva, pgstev, i, next, cur;
1448 int srcu_idx, peek, r = 0, rr;
1449 u8 *res;
1450
1451 cur = args->start_gfn;
1452 i = next = pgstev = 0;
1453
1454 if (unlikely(!kvm->arch.use_cmma))
1455 return -ENXIO;
1456 /* Invalid/unsupported flags were specified */
1457 if (args->flags & ~KVM_S390_CMMA_PEEK)
1458 return -EINVAL;
1459 /* Migration mode query, and we are not doing a migration */
1460 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1461 if (!peek && !s)
1462 return -EINVAL;
1463 /* CMMA is disabled or was not used, or the buffer has length zero */
1464 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1465 if (!bufsize || !kvm->mm->context.use_cmma) {
1466 memset(args, 0, sizeof(*args));
1467 return 0;
1468 }
1469
1470 if (!peek) {
1471 /* We are not peeking, and there are no dirty pages */
1472 if (!atomic64_read(&s->dirty_pages)) {
1473 memset(args, 0, sizeof(*args));
1474 return 0;
1475 }
1476 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1477 args->start_gfn);
1478 if (cur >= s->bitmap_size) /* nothing found, loop back */
1479 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1480 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1481 memset(args, 0, sizeof(*args));
1482 return 0;
1483 }
1484 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1485 }
1486
1487 res = vmalloc(bufsize);
1488 if (!res)
1489 return -ENOMEM;
1490
1491 args->start_gfn = cur;
1492
1493 down_read(&kvm->mm->mmap_sem);
1494 srcu_idx = srcu_read_lock(&kvm->srcu);
1495 while (i < bufsize) {
1496 hva = gfn_to_hva(kvm, cur);
1497 if (kvm_is_error_hva(hva)) {
1498 r = -EFAULT;
1499 break;
1500 }
1501 /* decrement only if we actually flipped the bit to 0 */
1502 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1503 atomic64_dec(&s->dirty_pages);
1504 r = get_pgste(kvm->mm, hva, &pgstev);
1505 if (r < 0)
1506 pgstev = 0;
1507 /* save the value */
1508 res[i++] = (pgstev >> 24) & 0x3;
1509 /*
1510 * if the next bit is too far away, stop.
1511 * if we reached the previous "next", find the next one
1512 */
1513 if (!peek) {
1514 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1515 break;
1516 if (cur == next)
1517 next = find_next_bit(s->pgste_bitmap,
1518 s->bitmap_size, cur + 1);
1519 /* reached the end of the bitmap or of the buffer, stop */
1520 if ((next >= s->bitmap_size) ||
1521 (next >= args->start_gfn + bufsize))
1522 break;
1523 }
1524 cur++;
1525 }
1526 srcu_read_unlock(&kvm->srcu, srcu_idx);
1527 up_read(&kvm->mm->mmap_sem);
1528 args->count = i;
1529 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1530
1531 rr = copy_to_user((void __user *)args->values, res, args->count);
1532 if (rr)
1533 r = -EFAULT;
1534
1535 vfree(res);
1536 return r;
1537 }
1538
1539 /*
1540 * This function sets the CMMA attributes for the given pages. If the input
1541 * buffer has zero length, no action is taken, otherwise the attributes are
1542 * set and the mm->context.use_cmma flag is set.
1543 */
1544 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1545 const struct kvm_s390_cmma_log *args)
1546 {
1547 unsigned long hva, mask, pgstev, i;
1548 uint8_t *bits;
1549 int srcu_idx, r = 0;
1550
1551 mask = args->mask;
1552
1553 if (!kvm->arch.use_cmma)
1554 return -ENXIO;
1555 /* invalid/unsupported flags */
1556 if (args->flags != 0)
1557 return -EINVAL;
1558 /* Enforce sane limit on memory allocation */
1559 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1560 return -EINVAL;
1561 /* Nothing to do */
1562 if (args->count == 0)
1563 return 0;
1564
1565 bits = vmalloc(sizeof(*bits) * args->count);
1566 if (!bits)
1567 return -ENOMEM;
1568
1569 r = copy_from_user(bits, (void __user *)args->values, args->count);
1570 if (r) {
1571 r = -EFAULT;
1572 goto out;
1573 }
1574
1575 down_read(&kvm->mm->mmap_sem);
1576 srcu_idx = srcu_read_lock(&kvm->srcu);
1577 for (i = 0; i < args->count; i++) {
1578 hva = gfn_to_hva(kvm, args->start_gfn + i);
1579 if (kvm_is_error_hva(hva)) {
1580 r = -EFAULT;
1581 break;
1582 }
1583
1584 pgstev = bits[i];
1585 pgstev = pgstev << 24;
1586 mask &= _PGSTE_GPS_USAGE_MASK;
1587 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1588 }
1589 srcu_read_unlock(&kvm->srcu, srcu_idx);
1590 up_read(&kvm->mm->mmap_sem);
1591
1592 if (!kvm->mm->context.use_cmma) {
1593 down_write(&kvm->mm->mmap_sem);
1594 kvm->mm->context.use_cmma = 1;
1595 up_write(&kvm->mm->mmap_sem);
1596 }
1597 out:
1598 vfree(bits);
1599 return r;
1600 }
1601
1602 long kvm_arch_vm_ioctl(struct file *filp,
1603 unsigned int ioctl, unsigned long arg)
1604 {
1605 struct kvm *kvm = filp->private_data;
1606 void __user *argp = (void __user *)arg;
1607 struct kvm_device_attr attr;
1608 int r;
1609
1610 switch (ioctl) {
1611 case KVM_S390_INTERRUPT: {
1612 struct kvm_s390_interrupt s390int;
1613
1614 r = -EFAULT;
1615 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1616 break;
1617 r = kvm_s390_inject_vm(kvm, &s390int);
1618 break;
1619 }
1620 case KVM_ENABLE_CAP: {
1621 struct kvm_enable_cap cap;
1622 r = -EFAULT;
1623 if (copy_from_user(&cap, argp, sizeof(cap)))
1624 break;
1625 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1626 break;
1627 }
1628 case KVM_CREATE_IRQCHIP: {
1629 struct kvm_irq_routing_entry routing;
1630
1631 r = -EINVAL;
1632 if (kvm->arch.use_irqchip) {
1633 /* Set up dummy routing. */
1634 memset(&routing, 0, sizeof(routing));
1635 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1636 }
1637 break;
1638 }
1639 case KVM_SET_DEVICE_ATTR: {
1640 r = -EFAULT;
1641 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1642 break;
1643 r = kvm_s390_vm_set_attr(kvm, &attr);
1644 break;
1645 }
1646 case KVM_GET_DEVICE_ATTR: {
1647 r = -EFAULT;
1648 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1649 break;
1650 r = kvm_s390_vm_get_attr(kvm, &attr);
1651 break;
1652 }
1653 case KVM_HAS_DEVICE_ATTR: {
1654 r = -EFAULT;
1655 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1656 break;
1657 r = kvm_s390_vm_has_attr(kvm, &attr);
1658 break;
1659 }
1660 case KVM_S390_GET_SKEYS: {
1661 struct kvm_s390_skeys args;
1662
1663 r = -EFAULT;
1664 if (copy_from_user(&args, argp,
1665 sizeof(struct kvm_s390_skeys)))
1666 break;
1667 r = kvm_s390_get_skeys(kvm, &args);
1668 break;
1669 }
1670 case KVM_S390_SET_SKEYS: {
1671 struct kvm_s390_skeys args;
1672
1673 r = -EFAULT;
1674 if (copy_from_user(&args, argp,
1675 sizeof(struct kvm_s390_skeys)))
1676 break;
1677 r = kvm_s390_set_skeys(kvm, &args);
1678 break;
1679 }
1680 case KVM_S390_GET_CMMA_BITS: {
1681 struct kvm_s390_cmma_log args;
1682
1683 r = -EFAULT;
1684 if (copy_from_user(&args, argp, sizeof(args)))
1685 break;
1686 r = kvm_s390_get_cmma_bits(kvm, &args);
1687 if (!r) {
1688 r = copy_to_user(argp, &args, sizeof(args));
1689 if (r)
1690 r = -EFAULT;
1691 }
1692 break;
1693 }
1694 case KVM_S390_SET_CMMA_BITS: {
1695 struct kvm_s390_cmma_log args;
1696
1697 r = -EFAULT;
1698 if (copy_from_user(&args, argp, sizeof(args)))
1699 break;
1700 r = kvm_s390_set_cmma_bits(kvm, &args);
1701 break;
1702 }
1703 default:
1704 r = -ENOTTY;
1705 }
1706
1707 return r;
1708 }
1709
1710 static int kvm_s390_query_ap_config(u8 *config)
1711 {
1712 u32 fcn_code = 0x04000000UL;
1713 u32 cc = 0;
1714
1715 memset(config, 0, 128);
1716 asm volatile(
1717 "lgr 0,%1\n"
1718 "lgr 2,%2\n"
1719 ".long 0xb2af0000\n" /* PQAP(QCI) */
1720 "0: ipm %0\n"
1721 "srl %0,28\n"
1722 "1:\n"
1723 EX_TABLE(0b, 1b)
1724 : "+r" (cc)
1725 : "r" (fcn_code), "r" (config)
1726 : "cc", "0", "2", "memory"
1727 );
1728
1729 return cc;
1730 }
1731
1732 static int kvm_s390_apxa_installed(void)
1733 {
1734 u8 config[128];
1735 int cc;
1736
1737 if (test_facility(12)) {
1738 cc = kvm_s390_query_ap_config(config);
1739
1740 if (cc)
1741 pr_err("PQAP(QCI) failed with cc=%d", cc);
1742 else
1743 return config[0] & 0x40;
1744 }
1745
1746 return 0;
1747 }
1748
1749 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1750 {
1751 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1752
1753 if (kvm_s390_apxa_installed())
1754 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1755 else
1756 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1757 }
1758
1759 static u64 kvm_s390_get_initial_cpuid(void)
1760 {
1761 struct cpuid cpuid;
1762
1763 get_cpu_id(&cpuid);
1764 cpuid.version = 0xff;
1765 return *((u64 *) &cpuid);
1766 }
1767
1768 static void kvm_s390_crypto_init(struct kvm *kvm)
1769 {
1770 if (!test_kvm_facility(kvm, 76))
1771 return;
1772
1773 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1774 kvm_s390_set_crycb_format(kvm);
1775
1776 /* Enable AES/DEA protected key functions by default */
1777 kvm->arch.crypto.aes_kw = 1;
1778 kvm->arch.crypto.dea_kw = 1;
1779 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1780 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1781 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1782 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1783 }
1784
1785 static void sca_dispose(struct kvm *kvm)
1786 {
1787 if (kvm->arch.use_esca)
1788 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1789 else
1790 free_page((unsigned long)(kvm->arch.sca));
1791 kvm->arch.sca = NULL;
1792 }
1793
1794 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1795 {
1796 gfp_t alloc_flags = GFP_KERNEL;
1797 int i, rc;
1798 char debug_name[16];
1799 static unsigned long sca_offset;
1800
1801 rc = -EINVAL;
1802 #ifdef CONFIG_KVM_S390_UCONTROL
1803 if (type & ~KVM_VM_S390_UCONTROL)
1804 goto out_err;
1805 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1806 goto out_err;
1807 #else
1808 if (type)
1809 goto out_err;
1810 #endif
1811
1812 rc = s390_enable_sie();
1813 if (rc)
1814 goto out_err;
1815
1816 rc = -ENOMEM;
1817
1818 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1819
1820 kvm->arch.use_esca = 0; /* start with basic SCA */
1821 if (!sclp.has_64bscao)
1822 alloc_flags |= GFP_DMA;
1823 rwlock_init(&kvm->arch.sca_lock);
1824 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1825 if (!kvm->arch.sca)
1826 goto out_err;
1827 spin_lock(&kvm_lock);
1828 sca_offset += 16;
1829 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1830 sca_offset = 0;
1831 kvm->arch.sca = (struct bsca_block *)
1832 ((char *) kvm->arch.sca + sca_offset);
1833 spin_unlock(&kvm_lock);
1834
1835 sprintf(debug_name, "kvm-%u", current->pid);
1836
1837 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1838 if (!kvm->arch.dbf)
1839 goto out_err;
1840
1841 kvm->arch.sie_page2 =
1842 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1843 if (!kvm->arch.sie_page2)
1844 goto out_err;
1845
1846 /* Populate the facility mask initially. */
1847 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1848 sizeof(S390_lowcore.stfle_fac_list));
1849 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1850 if (i < kvm_s390_fac_list_mask_size())
1851 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1852 else
1853 kvm->arch.model.fac_mask[i] = 0UL;
1854 }
1855
1856 /* Populate the facility list initially. */
1857 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1858 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1859 S390_ARCH_FAC_LIST_SIZE_BYTE);
1860
1861 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1862 set_kvm_facility(kvm->arch.model.fac_list, 74);
1863
1864 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1865 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1866
1867 kvm_s390_crypto_init(kvm);
1868
1869 mutex_init(&kvm->arch.float_int.ais_lock);
1870 kvm->arch.float_int.simm = 0;
1871 kvm->arch.float_int.nimm = 0;
1872 spin_lock_init(&kvm->arch.float_int.lock);
1873 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1874 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1875 init_waitqueue_head(&kvm->arch.ipte_wq);
1876 mutex_init(&kvm->arch.ipte_mutex);
1877
1878 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1879 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1880
1881 if (type & KVM_VM_S390_UCONTROL) {
1882 kvm->arch.gmap = NULL;
1883 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1884 } else {
1885 if (sclp.hamax == U64_MAX)
1886 kvm->arch.mem_limit = TASK_SIZE_MAX;
1887 else
1888 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1889 sclp.hamax + 1);
1890 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1891 if (!kvm->arch.gmap)
1892 goto out_err;
1893 kvm->arch.gmap->private = kvm;
1894 kvm->arch.gmap->pfault_enabled = 0;
1895 }
1896
1897 kvm->arch.css_support = 0;
1898 kvm->arch.use_irqchip = 0;
1899 kvm->arch.epoch = 0;
1900
1901 spin_lock_init(&kvm->arch.start_stop_lock);
1902 kvm_s390_vsie_init(kvm);
1903 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1904
1905 return 0;
1906 out_err:
1907 free_page((unsigned long)kvm->arch.sie_page2);
1908 debug_unregister(kvm->arch.dbf);
1909 sca_dispose(kvm);
1910 KVM_EVENT(3, "creation of vm failed: %d", rc);
1911 return rc;
1912 }
1913
1914 bool kvm_arch_has_vcpu_debugfs(void)
1915 {
1916 return false;
1917 }
1918
1919 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1920 {
1921 return 0;
1922 }
1923
1924 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1925 {
1926 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1927 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1928 kvm_s390_clear_local_irqs(vcpu);
1929 kvm_clear_async_pf_completion_queue(vcpu);
1930 if (!kvm_is_ucontrol(vcpu->kvm))
1931 sca_del_vcpu(vcpu);
1932
1933 if (kvm_is_ucontrol(vcpu->kvm))
1934 gmap_remove(vcpu->arch.gmap);
1935
1936 if (vcpu->kvm->arch.use_cmma)
1937 kvm_s390_vcpu_unsetup_cmma(vcpu);
1938 free_page((unsigned long)(vcpu->arch.sie_block));
1939
1940 kvm_vcpu_uninit(vcpu);
1941 kmem_cache_free(kvm_vcpu_cache, vcpu);
1942 }
1943
1944 static void kvm_free_vcpus(struct kvm *kvm)
1945 {
1946 unsigned int i;
1947 struct kvm_vcpu *vcpu;
1948
1949 kvm_for_each_vcpu(i, vcpu, kvm)
1950 kvm_arch_vcpu_destroy(vcpu);
1951
1952 mutex_lock(&kvm->lock);
1953 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1954 kvm->vcpus[i] = NULL;
1955
1956 atomic_set(&kvm->online_vcpus, 0);
1957 mutex_unlock(&kvm->lock);
1958 }
1959
1960 void kvm_arch_destroy_vm(struct kvm *kvm)
1961 {
1962 kvm_free_vcpus(kvm);
1963 sca_dispose(kvm);
1964 debug_unregister(kvm->arch.dbf);
1965 free_page((unsigned long)kvm->arch.sie_page2);
1966 if (!kvm_is_ucontrol(kvm))
1967 gmap_remove(kvm->arch.gmap);
1968 kvm_s390_destroy_adapters(kvm);
1969 kvm_s390_clear_float_irqs(kvm);
1970 kvm_s390_vsie_destroy(kvm);
1971 if (kvm->arch.migration_state) {
1972 vfree(kvm->arch.migration_state->pgste_bitmap);
1973 kfree(kvm->arch.migration_state);
1974 }
1975 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1976 }
1977
1978 /* Section: vcpu related */
1979 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1980 {
1981 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1982 if (!vcpu->arch.gmap)
1983 return -ENOMEM;
1984 vcpu->arch.gmap->private = vcpu->kvm;
1985
1986 return 0;
1987 }
1988
1989 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1990 {
1991 if (!kvm_s390_use_sca_entries())
1992 return;
1993 read_lock(&vcpu->kvm->arch.sca_lock);
1994 if (vcpu->kvm->arch.use_esca) {
1995 struct esca_block *sca = vcpu->kvm->arch.sca;
1996
1997 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1998 sca->cpu[vcpu->vcpu_id].sda = 0;
1999 } else {
2000 struct bsca_block *sca = vcpu->kvm->arch.sca;
2001
2002 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2003 sca->cpu[vcpu->vcpu_id].sda = 0;
2004 }
2005 read_unlock(&vcpu->kvm->arch.sca_lock);
2006 }
2007
2008 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2009 {
2010 if (!kvm_s390_use_sca_entries()) {
2011 struct bsca_block *sca = vcpu->kvm->arch.sca;
2012
2013 /* we still need the basic sca for the ipte control */
2014 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2015 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2016 }
2017 read_lock(&vcpu->kvm->arch.sca_lock);
2018 if (vcpu->kvm->arch.use_esca) {
2019 struct esca_block *sca = vcpu->kvm->arch.sca;
2020
2021 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2022 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2023 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2024 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2025 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2026 } else {
2027 struct bsca_block *sca = vcpu->kvm->arch.sca;
2028
2029 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2030 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2031 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2032 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2033 }
2034 read_unlock(&vcpu->kvm->arch.sca_lock);
2035 }
2036
2037 /* Basic SCA to Extended SCA data copy routines */
2038 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2039 {
2040 d->sda = s->sda;
2041 d->sigp_ctrl.c = s->sigp_ctrl.c;
2042 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2043 }
2044
2045 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2046 {
2047 int i;
2048
2049 d->ipte_control = s->ipte_control;
2050 d->mcn[0] = s->mcn;
2051 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2052 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2053 }
2054
2055 static int sca_switch_to_extended(struct kvm *kvm)
2056 {
2057 struct bsca_block *old_sca = kvm->arch.sca;
2058 struct esca_block *new_sca;
2059 struct kvm_vcpu *vcpu;
2060 unsigned int vcpu_idx;
2061 u32 scaol, scaoh;
2062
2063 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2064 if (!new_sca)
2065 return -ENOMEM;
2066
2067 scaoh = (u32)((u64)(new_sca) >> 32);
2068 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2069
2070 kvm_s390_vcpu_block_all(kvm);
2071 write_lock(&kvm->arch.sca_lock);
2072
2073 sca_copy_b_to_e(new_sca, old_sca);
2074
2075 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2076 vcpu->arch.sie_block->scaoh = scaoh;
2077 vcpu->arch.sie_block->scaol = scaol;
2078 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2079 }
2080 kvm->arch.sca = new_sca;
2081 kvm->arch.use_esca = 1;
2082
2083 write_unlock(&kvm->arch.sca_lock);
2084 kvm_s390_vcpu_unblock_all(kvm);
2085
2086 free_page((unsigned long)old_sca);
2087
2088 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2089 old_sca, kvm->arch.sca);
2090 return 0;
2091 }
2092
2093 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2094 {
2095 int rc;
2096
2097 if (!kvm_s390_use_sca_entries()) {
2098 if (id < KVM_MAX_VCPUS)
2099 return true;
2100 return false;
2101 }
2102 if (id < KVM_S390_BSCA_CPU_SLOTS)
2103 return true;
2104 if (!sclp.has_esca || !sclp.has_64bscao)
2105 return false;
2106
2107 mutex_lock(&kvm->lock);
2108 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2109 mutex_unlock(&kvm->lock);
2110
2111 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2112 }
2113
2114 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2115 {
2116 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2117 kvm_clear_async_pf_completion_queue(vcpu);
2118 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2119 KVM_SYNC_GPRS |
2120 KVM_SYNC_ACRS |
2121 KVM_SYNC_CRS |
2122 KVM_SYNC_ARCH0 |
2123 KVM_SYNC_PFAULT;
2124 kvm_s390_set_prefix(vcpu, 0);
2125 if (test_kvm_facility(vcpu->kvm, 64))
2126 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2127 if (test_kvm_facility(vcpu->kvm, 133))
2128 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2129 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2130 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2131 */
2132 if (MACHINE_HAS_VX)
2133 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2134 else
2135 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2136
2137 if (kvm_is_ucontrol(vcpu->kvm))
2138 return __kvm_ucontrol_vcpu_init(vcpu);
2139
2140 return 0;
2141 }
2142
2143 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2144 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2145 {
2146 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2147 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2148 vcpu->arch.cputm_start = get_tod_clock_fast();
2149 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2150 }
2151
2152 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2153 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2154 {
2155 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2156 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2157 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2158 vcpu->arch.cputm_start = 0;
2159 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2160 }
2161
2162 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2163 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2164 {
2165 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2166 vcpu->arch.cputm_enabled = true;
2167 __start_cpu_timer_accounting(vcpu);
2168 }
2169
2170 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2171 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2172 {
2173 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2174 __stop_cpu_timer_accounting(vcpu);
2175 vcpu->arch.cputm_enabled = false;
2176 }
2177
2178 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2179 {
2180 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2181 __enable_cpu_timer_accounting(vcpu);
2182 preempt_enable();
2183 }
2184
2185 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2186 {
2187 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2188 __disable_cpu_timer_accounting(vcpu);
2189 preempt_enable();
2190 }
2191
2192 /* set the cpu timer - may only be called from the VCPU thread itself */
2193 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2194 {
2195 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2196 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2197 if (vcpu->arch.cputm_enabled)
2198 vcpu->arch.cputm_start = get_tod_clock_fast();
2199 vcpu->arch.sie_block->cputm = cputm;
2200 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2201 preempt_enable();
2202 }
2203
2204 /* update and get the cpu timer - can also be called from other VCPU threads */
2205 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2206 {
2207 unsigned int seq;
2208 __u64 value;
2209
2210 if (unlikely(!vcpu->arch.cputm_enabled))
2211 return vcpu->arch.sie_block->cputm;
2212
2213 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2214 do {
2215 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2216 /*
2217 * If the writer would ever execute a read in the critical
2218 * section, e.g. in irq context, we have a deadlock.
2219 */
2220 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2221 value = vcpu->arch.sie_block->cputm;
2222 /* if cputm_start is 0, accounting is being started/stopped */
2223 if (likely(vcpu->arch.cputm_start))
2224 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2225 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2226 preempt_enable();
2227 return value;
2228 }
2229
2230 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2231 {
2232
2233 gmap_enable(vcpu->arch.enabled_gmap);
2234 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2235 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2236 __start_cpu_timer_accounting(vcpu);
2237 vcpu->cpu = cpu;
2238 }
2239
2240 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2241 {
2242 vcpu->cpu = -1;
2243 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2244 __stop_cpu_timer_accounting(vcpu);
2245 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2246 vcpu->arch.enabled_gmap = gmap_get_enabled();
2247 gmap_disable(vcpu->arch.enabled_gmap);
2248
2249 }
2250
2251 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2252 {
2253 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2254 vcpu->arch.sie_block->gpsw.mask = 0UL;
2255 vcpu->arch.sie_block->gpsw.addr = 0UL;
2256 kvm_s390_set_prefix(vcpu, 0);
2257 kvm_s390_set_cpu_timer(vcpu, 0);
2258 vcpu->arch.sie_block->ckc = 0UL;
2259 vcpu->arch.sie_block->todpr = 0;
2260 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2261 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2262 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2263 /* make sure the new fpc will be lazily loaded */
2264 save_fpu_regs();
2265 current->thread.fpu.fpc = 0;
2266 vcpu->arch.sie_block->gbea = 1;
2267 vcpu->arch.sie_block->pp = 0;
2268 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2269 kvm_clear_async_pf_completion_queue(vcpu);
2270 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2271 kvm_s390_vcpu_stop(vcpu);
2272 kvm_s390_clear_local_irqs(vcpu);
2273 }
2274
2275 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2276 {
2277 mutex_lock(&vcpu->kvm->lock);
2278 preempt_disable();
2279 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2280 preempt_enable();
2281 mutex_unlock(&vcpu->kvm->lock);
2282 if (!kvm_is_ucontrol(vcpu->kvm)) {
2283 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2284 sca_add_vcpu(vcpu);
2285 }
2286 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2287 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2288 /* make vcpu_load load the right gmap on the first trigger */
2289 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2290 }
2291
2292 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2293 {
2294 if (!test_kvm_facility(vcpu->kvm, 76))
2295 return;
2296
2297 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2298
2299 if (vcpu->kvm->arch.crypto.aes_kw)
2300 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2301 if (vcpu->kvm->arch.crypto.dea_kw)
2302 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2303
2304 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2305 }
2306
2307 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2308 {
2309 free_page(vcpu->arch.sie_block->cbrlo);
2310 vcpu->arch.sie_block->cbrlo = 0;
2311 }
2312
2313 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2314 {
2315 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2316 if (!vcpu->arch.sie_block->cbrlo)
2317 return -ENOMEM;
2318
2319 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2320 return 0;
2321 }
2322
2323 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2324 {
2325 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2326
2327 vcpu->arch.sie_block->ibc = model->ibc;
2328 if (test_kvm_facility(vcpu->kvm, 7))
2329 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2330 }
2331
2332 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2333 {
2334 int rc = 0;
2335
2336 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2337 CPUSTAT_SM |
2338 CPUSTAT_STOPPED);
2339
2340 if (test_kvm_facility(vcpu->kvm, 78))
2341 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2342 else if (test_kvm_facility(vcpu->kvm, 8))
2343 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2344
2345 kvm_s390_vcpu_setup_model(vcpu);
2346
2347 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2348 if (MACHINE_HAS_ESOP)
2349 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2350 if (test_kvm_facility(vcpu->kvm, 9))
2351 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2352 if (test_kvm_facility(vcpu->kvm, 73))
2353 vcpu->arch.sie_block->ecb |= ECB_TE;
2354
2355 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2356 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2357 if (test_kvm_facility(vcpu->kvm, 130))
2358 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2359 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2360 if (sclp.has_cei)
2361 vcpu->arch.sie_block->eca |= ECA_CEI;
2362 if (sclp.has_ib)
2363 vcpu->arch.sie_block->eca |= ECA_IB;
2364 if (sclp.has_siif)
2365 vcpu->arch.sie_block->eca |= ECA_SII;
2366 if (sclp.has_sigpif)
2367 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2368 if (test_kvm_facility(vcpu->kvm, 129)) {
2369 vcpu->arch.sie_block->eca |= ECA_VX;
2370 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2371 }
2372 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2373 | SDNXC;
2374 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2375
2376 if (sclp.has_kss)
2377 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2378 else
2379 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2380
2381 if (vcpu->kvm->arch.use_cmma) {
2382 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2383 if (rc)
2384 return rc;
2385 }
2386 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2387 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2388
2389 kvm_s390_vcpu_crypto_setup(vcpu);
2390
2391 return rc;
2392 }
2393
2394 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2395 unsigned int id)
2396 {
2397 struct kvm_vcpu *vcpu;
2398 struct sie_page *sie_page;
2399 int rc = -EINVAL;
2400
2401 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2402 goto out;
2403
2404 rc = -ENOMEM;
2405
2406 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2407 if (!vcpu)
2408 goto out;
2409
2410 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2411 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2412 if (!sie_page)
2413 goto out_free_cpu;
2414
2415 vcpu->arch.sie_block = &sie_page->sie_block;
2416 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2417
2418 /* the real guest size will always be smaller than msl */
2419 vcpu->arch.sie_block->mso = 0;
2420 vcpu->arch.sie_block->msl = sclp.hamax;
2421
2422 vcpu->arch.sie_block->icpua = id;
2423 spin_lock_init(&vcpu->arch.local_int.lock);
2424 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2425 vcpu->arch.local_int.wq = &vcpu->wq;
2426 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2427 seqcount_init(&vcpu->arch.cputm_seqcount);
2428
2429 rc = kvm_vcpu_init(vcpu, kvm, id);
2430 if (rc)
2431 goto out_free_sie_block;
2432 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2433 vcpu->arch.sie_block);
2434 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2435
2436 return vcpu;
2437 out_free_sie_block:
2438 free_page((unsigned long)(vcpu->arch.sie_block));
2439 out_free_cpu:
2440 kmem_cache_free(kvm_vcpu_cache, vcpu);
2441 out:
2442 return ERR_PTR(rc);
2443 }
2444
2445 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2446 {
2447 return kvm_s390_vcpu_has_irq(vcpu, 0);
2448 }
2449
2450 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2451 {
2452 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2453 exit_sie(vcpu);
2454 }
2455
2456 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2457 {
2458 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2459 }
2460
2461 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2462 {
2463 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2464 exit_sie(vcpu);
2465 }
2466
2467 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2468 {
2469 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2470 }
2471
2472 /*
2473 * Kick a guest cpu out of SIE and wait until SIE is not running.
2474 * If the CPU is not running (e.g. waiting as idle) the function will
2475 * return immediately. */
2476 void exit_sie(struct kvm_vcpu *vcpu)
2477 {
2478 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2479 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2480 cpu_relax();
2481 }
2482
2483 /* Kick a guest cpu out of SIE to process a request synchronously */
2484 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2485 {
2486 kvm_make_request(req, vcpu);
2487 kvm_s390_vcpu_request(vcpu);
2488 }
2489
2490 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2491 unsigned long end)
2492 {
2493 struct kvm *kvm = gmap->private;
2494 struct kvm_vcpu *vcpu;
2495 unsigned long prefix;
2496 int i;
2497
2498 if (gmap_is_shadow(gmap))
2499 return;
2500 if (start >= 1UL << 31)
2501 /* We are only interested in prefix pages */
2502 return;
2503 kvm_for_each_vcpu(i, vcpu, kvm) {
2504 /* match against both prefix pages */
2505 prefix = kvm_s390_get_prefix(vcpu);
2506 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2507 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2508 start, end);
2509 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2510 }
2511 }
2512 }
2513
2514 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2515 {
2516 /* kvm common code refers to this, but never calls it */
2517 BUG();
2518 return 0;
2519 }
2520
2521 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2522 struct kvm_one_reg *reg)
2523 {
2524 int r = -EINVAL;
2525
2526 switch (reg->id) {
2527 case KVM_REG_S390_TODPR:
2528 r = put_user(vcpu->arch.sie_block->todpr,
2529 (u32 __user *)reg->addr);
2530 break;
2531 case KVM_REG_S390_EPOCHDIFF:
2532 r = put_user(vcpu->arch.sie_block->epoch,
2533 (u64 __user *)reg->addr);
2534 break;
2535 case KVM_REG_S390_CPU_TIMER:
2536 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2537 (u64 __user *)reg->addr);
2538 break;
2539 case KVM_REG_S390_CLOCK_COMP:
2540 r = put_user(vcpu->arch.sie_block->ckc,
2541 (u64 __user *)reg->addr);
2542 break;
2543 case KVM_REG_S390_PFTOKEN:
2544 r = put_user(vcpu->arch.pfault_token,
2545 (u64 __user *)reg->addr);
2546 break;
2547 case KVM_REG_S390_PFCOMPARE:
2548 r = put_user(vcpu->arch.pfault_compare,
2549 (u64 __user *)reg->addr);
2550 break;
2551 case KVM_REG_S390_PFSELECT:
2552 r = put_user(vcpu->arch.pfault_select,
2553 (u64 __user *)reg->addr);
2554 break;
2555 case KVM_REG_S390_PP:
2556 r = put_user(vcpu->arch.sie_block->pp,
2557 (u64 __user *)reg->addr);
2558 break;
2559 case KVM_REG_S390_GBEA:
2560 r = put_user(vcpu->arch.sie_block->gbea,
2561 (u64 __user *)reg->addr);
2562 break;
2563 default:
2564 break;
2565 }
2566
2567 return r;
2568 }
2569
2570 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2571 struct kvm_one_reg *reg)
2572 {
2573 int r = -EINVAL;
2574 __u64 val;
2575
2576 switch (reg->id) {
2577 case KVM_REG_S390_TODPR:
2578 r = get_user(vcpu->arch.sie_block->todpr,
2579 (u32 __user *)reg->addr);
2580 break;
2581 case KVM_REG_S390_EPOCHDIFF:
2582 r = get_user(vcpu->arch.sie_block->epoch,
2583 (u64 __user *)reg->addr);
2584 break;
2585 case KVM_REG_S390_CPU_TIMER:
2586 r = get_user(val, (u64 __user *)reg->addr);
2587 if (!r)
2588 kvm_s390_set_cpu_timer(vcpu, val);
2589 break;
2590 case KVM_REG_S390_CLOCK_COMP:
2591 r = get_user(vcpu->arch.sie_block->ckc,
2592 (u64 __user *)reg->addr);
2593 break;
2594 case KVM_REG_S390_PFTOKEN:
2595 r = get_user(vcpu->arch.pfault_token,
2596 (u64 __user *)reg->addr);
2597 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2598 kvm_clear_async_pf_completion_queue(vcpu);
2599 break;
2600 case KVM_REG_S390_PFCOMPARE:
2601 r = get_user(vcpu->arch.pfault_compare,
2602 (u64 __user *)reg->addr);
2603 break;
2604 case KVM_REG_S390_PFSELECT:
2605 r = get_user(vcpu->arch.pfault_select,
2606 (u64 __user *)reg->addr);
2607 break;
2608 case KVM_REG_S390_PP:
2609 r = get_user(vcpu->arch.sie_block->pp,
2610 (u64 __user *)reg->addr);
2611 break;
2612 case KVM_REG_S390_GBEA:
2613 r = get_user(vcpu->arch.sie_block->gbea,
2614 (u64 __user *)reg->addr);
2615 break;
2616 default:
2617 break;
2618 }
2619
2620 return r;
2621 }
2622
2623 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2624 {
2625 kvm_s390_vcpu_initial_reset(vcpu);
2626 return 0;
2627 }
2628
2629 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2630 {
2631 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2632 return 0;
2633 }
2634
2635 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2636 {
2637 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2638 return 0;
2639 }
2640
2641 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2642 struct kvm_sregs *sregs)
2643 {
2644 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2645 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2646 return 0;
2647 }
2648
2649 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2650 struct kvm_sregs *sregs)
2651 {
2652 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2653 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2654 return 0;
2655 }
2656
2657 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2658 {
2659 if (test_fp_ctl(fpu->fpc))
2660 return -EINVAL;
2661 vcpu->run->s.regs.fpc = fpu->fpc;
2662 if (MACHINE_HAS_VX)
2663 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2664 (freg_t *) fpu->fprs);
2665 else
2666 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2667 return 0;
2668 }
2669
2670 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2671 {
2672 /* make sure we have the latest values */
2673 save_fpu_regs();
2674 if (MACHINE_HAS_VX)
2675 convert_vx_to_fp((freg_t *) fpu->fprs,
2676 (__vector128 *) vcpu->run->s.regs.vrs);
2677 else
2678 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2679 fpu->fpc = vcpu->run->s.regs.fpc;
2680 return 0;
2681 }
2682
2683 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2684 {
2685 int rc = 0;
2686
2687 if (!is_vcpu_stopped(vcpu))
2688 rc = -EBUSY;
2689 else {
2690 vcpu->run->psw_mask = psw.mask;
2691 vcpu->run->psw_addr = psw.addr;
2692 }
2693 return rc;
2694 }
2695
2696 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2697 struct kvm_translation *tr)
2698 {
2699 return -EINVAL; /* not implemented yet */
2700 }
2701
2702 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2703 KVM_GUESTDBG_USE_HW_BP | \
2704 KVM_GUESTDBG_ENABLE)
2705
2706 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2707 struct kvm_guest_debug *dbg)
2708 {
2709 int rc = 0;
2710
2711 vcpu->guest_debug = 0;
2712 kvm_s390_clear_bp_data(vcpu);
2713
2714 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2715 return -EINVAL;
2716 if (!sclp.has_gpere)
2717 return -EINVAL;
2718
2719 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2720 vcpu->guest_debug = dbg->control;
2721 /* enforce guest PER */
2722 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2723
2724 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2725 rc = kvm_s390_import_bp_data(vcpu, dbg);
2726 } else {
2727 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2728 vcpu->arch.guestdbg.last_bp = 0;
2729 }
2730
2731 if (rc) {
2732 vcpu->guest_debug = 0;
2733 kvm_s390_clear_bp_data(vcpu);
2734 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2735 }
2736
2737 return rc;
2738 }
2739
2740 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2741 struct kvm_mp_state *mp_state)
2742 {
2743 /* CHECK_STOP and LOAD are not supported yet */
2744 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2745 KVM_MP_STATE_OPERATING;
2746 }
2747
2748 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2749 struct kvm_mp_state *mp_state)
2750 {
2751 int rc = 0;
2752
2753 /* user space knows about this interface - let it control the state */
2754 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2755
2756 switch (mp_state->mp_state) {
2757 case KVM_MP_STATE_STOPPED:
2758 kvm_s390_vcpu_stop(vcpu);
2759 break;
2760 case KVM_MP_STATE_OPERATING:
2761 kvm_s390_vcpu_start(vcpu);
2762 break;
2763 case KVM_MP_STATE_LOAD:
2764 case KVM_MP_STATE_CHECK_STOP:
2765 /* fall through - CHECK_STOP and LOAD are not supported yet */
2766 default:
2767 rc = -ENXIO;
2768 }
2769
2770 return rc;
2771 }
2772
2773 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2774 {
2775 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2776 }
2777
2778 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2779 {
2780 retry:
2781 kvm_s390_vcpu_request_handled(vcpu);
2782 if (!kvm_request_pending(vcpu))
2783 return 0;
2784 /*
2785 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2786 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2787 * This ensures that the ipte instruction for this request has
2788 * already finished. We might race against a second unmapper that
2789 * wants to set the blocking bit. Lets just retry the request loop.
2790 */
2791 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2792 int rc;
2793 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2794 kvm_s390_get_prefix(vcpu),
2795 PAGE_SIZE * 2, PROT_WRITE);
2796 if (rc) {
2797 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2798 return rc;
2799 }
2800 goto retry;
2801 }
2802
2803 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2804 vcpu->arch.sie_block->ihcpu = 0xffff;
2805 goto retry;
2806 }
2807
2808 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2809 if (!ibs_enabled(vcpu)) {
2810 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2811 atomic_or(CPUSTAT_IBS,
2812 &vcpu->arch.sie_block->cpuflags);
2813 }
2814 goto retry;
2815 }
2816
2817 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2818 if (ibs_enabled(vcpu)) {
2819 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2820 atomic_andnot(CPUSTAT_IBS,
2821 &vcpu->arch.sie_block->cpuflags);
2822 }
2823 goto retry;
2824 }
2825
2826 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2827 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2828 goto retry;
2829 }
2830
2831 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2832 /*
2833 * Disable CMMA virtualization; we will emulate the ESSA
2834 * instruction manually, in order to provide additional
2835 * functionalities needed for live migration.
2836 */
2837 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2838 goto retry;
2839 }
2840
2841 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2842 /*
2843 * Re-enable CMMA virtualization if CMMA is available and
2844 * was used.
2845 */
2846 if ((vcpu->kvm->arch.use_cmma) &&
2847 (vcpu->kvm->mm->context.use_cmma))
2848 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2849 goto retry;
2850 }
2851
2852 /* nothing to do, just clear the request */
2853 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2854
2855 return 0;
2856 }
2857
2858 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2859 {
2860 struct kvm_vcpu *vcpu;
2861 int i;
2862
2863 mutex_lock(&kvm->lock);
2864 preempt_disable();
2865 kvm->arch.epoch = tod - get_tod_clock();
2866 kvm_s390_vcpu_block_all(kvm);
2867 kvm_for_each_vcpu(i, vcpu, kvm)
2868 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2869 kvm_s390_vcpu_unblock_all(kvm);
2870 preempt_enable();
2871 mutex_unlock(&kvm->lock);
2872 }
2873
2874 /**
2875 * kvm_arch_fault_in_page - fault-in guest page if necessary
2876 * @vcpu: The corresponding virtual cpu
2877 * @gpa: Guest physical address
2878 * @writable: Whether the page should be writable or not
2879 *
2880 * Make sure that a guest page has been faulted-in on the host.
2881 *
2882 * Return: Zero on success, negative error code otherwise.
2883 */
2884 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2885 {
2886 return gmap_fault(vcpu->arch.gmap, gpa,
2887 writable ? FAULT_FLAG_WRITE : 0);
2888 }
2889
2890 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2891 unsigned long token)
2892 {
2893 struct kvm_s390_interrupt inti;
2894 struct kvm_s390_irq irq;
2895
2896 if (start_token) {
2897 irq.u.ext.ext_params2 = token;
2898 irq.type = KVM_S390_INT_PFAULT_INIT;
2899 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2900 } else {
2901 inti.type = KVM_S390_INT_PFAULT_DONE;
2902 inti.parm64 = token;
2903 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2904 }
2905 }
2906
2907 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2908 struct kvm_async_pf *work)
2909 {
2910 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2911 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2912 }
2913
2914 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2915 struct kvm_async_pf *work)
2916 {
2917 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2918 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2919 }
2920
2921 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2922 struct kvm_async_pf *work)
2923 {
2924 /* s390 will always inject the page directly */
2925 }
2926
2927 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2928 {
2929 /*
2930 * s390 will always inject the page directly,
2931 * but we still want check_async_completion to cleanup
2932 */
2933 return true;
2934 }
2935
2936 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2937 {
2938 hva_t hva;
2939 struct kvm_arch_async_pf arch;
2940 int rc;
2941
2942 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2943 return 0;
2944 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2945 vcpu->arch.pfault_compare)
2946 return 0;
2947 if (psw_extint_disabled(vcpu))
2948 return 0;
2949 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2950 return 0;
2951 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2952 return 0;
2953 if (!vcpu->arch.gmap->pfault_enabled)
2954 return 0;
2955
2956 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2957 hva += current->thread.gmap_addr & ~PAGE_MASK;
2958 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2959 return 0;
2960
2961 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2962 return rc;
2963 }
2964
2965 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2966 {
2967 int rc, cpuflags;
2968
2969 /*
2970 * On s390 notifications for arriving pages will be delivered directly
2971 * to the guest but the house keeping for completed pfaults is
2972 * handled outside the worker.
2973 */
2974 kvm_check_async_pf_completion(vcpu);
2975
2976 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2977 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2978
2979 if (need_resched())
2980 schedule();
2981
2982 if (test_cpu_flag(CIF_MCCK_PENDING))
2983 s390_handle_mcck();
2984
2985 if (!kvm_is_ucontrol(vcpu->kvm)) {
2986 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2987 if (rc)
2988 return rc;
2989 }
2990
2991 rc = kvm_s390_handle_requests(vcpu);
2992 if (rc)
2993 return rc;
2994
2995 if (guestdbg_enabled(vcpu)) {
2996 kvm_s390_backup_guest_per_regs(vcpu);
2997 kvm_s390_patch_guest_per_regs(vcpu);
2998 }
2999
3000 vcpu->arch.sie_block->icptcode = 0;
3001 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3002 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3003 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3004
3005 return 0;
3006 }
3007
3008 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3009 {
3010 struct kvm_s390_pgm_info pgm_info = {
3011 .code = PGM_ADDRESSING,
3012 };
3013 u8 opcode, ilen;
3014 int rc;
3015
3016 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3017 trace_kvm_s390_sie_fault(vcpu);
3018
3019 /*
3020 * We want to inject an addressing exception, which is defined as a
3021 * suppressing or terminating exception. However, since we came here
3022 * by a DAT access exception, the PSW still points to the faulting
3023 * instruction since DAT exceptions are nullifying. So we've got
3024 * to look up the current opcode to get the length of the instruction
3025 * to be able to forward the PSW.
3026 */
3027 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3028 ilen = insn_length(opcode);
3029 if (rc < 0) {
3030 return rc;
3031 } else if (rc) {
3032 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3033 * Forward by arbitrary ilc, injection will take care of
3034 * nullification if necessary.
3035 */
3036 pgm_info = vcpu->arch.pgm;
3037 ilen = 4;
3038 }
3039 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3040 kvm_s390_forward_psw(vcpu, ilen);
3041 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3042 }
3043
3044 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3045 {
3046 struct mcck_volatile_info *mcck_info;
3047 struct sie_page *sie_page;
3048
3049 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3050 vcpu->arch.sie_block->icptcode);
3051 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3052
3053 if (guestdbg_enabled(vcpu))
3054 kvm_s390_restore_guest_per_regs(vcpu);
3055
3056 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3057 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3058
3059 if (exit_reason == -EINTR) {
3060 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3061 sie_page = container_of(vcpu->arch.sie_block,
3062 struct sie_page, sie_block);
3063 mcck_info = &sie_page->mcck_info;
3064 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3065 return 0;
3066 }
3067
3068 if (vcpu->arch.sie_block->icptcode > 0) {
3069 int rc = kvm_handle_sie_intercept(vcpu);
3070
3071 if (rc != -EOPNOTSUPP)
3072 return rc;
3073 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3074 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3075 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3076 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3077 return -EREMOTE;
3078 } else if (exit_reason != -EFAULT) {
3079 vcpu->stat.exit_null++;
3080 return 0;
3081 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3082 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3083 vcpu->run->s390_ucontrol.trans_exc_code =
3084 current->thread.gmap_addr;
3085 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3086 return -EREMOTE;
3087 } else if (current->thread.gmap_pfault) {
3088 trace_kvm_s390_major_guest_pfault(vcpu);
3089 current->thread.gmap_pfault = 0;
3090 if (kvm_arch_setup_async_pf(vcpu))
3091 return 0;
3092 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3093 }
3094 return vcpu_post_run_fault_in_sie(vcpu);
3095 }
3096
3097 static int __vcpu_run(struct kvm_vcpu *vcpu)
3098 {
3099 int rc, exit_reason;
3100
3101 /*
3102 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3103 * ning the guest), so that memslots (and other stuff) are protected
3104 */
3105 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3106
3107 do {
3108 rc = vcpu_pre_run(vcpu);
3109 if (rc)
3110 break;
3111
3112 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3113 /*
3114 * As PF_VCPU will be used in fault handler, between
3115 * guest_enter and guest_exit should be no uaccess.
3116 */
3117 local_irq_disable();
3118 guest_enter_irqoff();
3119 __disable_cpu_timer_accounting(vcpu);
3120 local_irq_enable();
3121 exit_reason = sie64a(vcpu->arch.sie_block,
3122 vcpu->run->s.regs.gprs);
3123 local_irq_disable();
3124 __enable_cpu_timer_accounting(vcpu);
3125 guest_exit_irqoff();
3126 local_irq_enable();
3127 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3128
3129 rc = vcpu_post_run(vcpu, exit_reason);
3130 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3131
3132 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3133 return rc;
3134 }
3135
3136 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3137 {
3138 struct runtime_instr_cb *riccb;
3139 struct gs_cb *gscb;
3140
3141 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3142 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3143 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3144 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3145 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3146 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3147 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3148 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3149 /* some control register changes require a tlb flush */
3150 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3151 }
3152 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3153 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3154 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3155 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3156 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3157 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3158 }
3159 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3160 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3161 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3162 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3163 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3164 kvm_clear_async_pf_completion_queue(vcpu);
3165 }
3166 /*
3167 * If userspace sets the riccb (e.g. after migration) to a valid state,
3168 * we should enable RI here instead of doing the lazy enablement.
3169 */
3170 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3171 test_kvm_facility(vcpu->kvm, 64) &&
3172 riccb->valid &&
3173 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3174 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3175 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3176 }
3177 /*
3178 * If userspace sets the gscb (e.g. after migration) to non-zero,
3179 * we should enable GS here instead of doing the lazy enablement.
3180 */
3181 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3182 test_kvm_facility(vcpu->kvm, 133) &&
3183 gscb->gssm &&
3184 !vcpu->arch.gs_enabled) {
3185 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3186 vcpu->arch.sie_block->ecb |= ECB_GS;
3187 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3188 vcpu->arch.gs_enabled = 1;
3189 }
3190 save_access_regs(vcpu->arch.host_acrs);
3191 restore_access_regs(vcpu->run->s.regs.acrs);
3192 /* save host (userspace) fprs/vrs */
3193 save_fpu_regs();
3194 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3195 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3196 if (MACHINE_HAS_VX)
3197 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3198 else
3199 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3200 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3201 if (test_fp_ctl(current->thread.fpu.fpc))
3202 /* User space provided an invalid FPC, let's clear it */
3203 current->thread.fpu.fpc = 0;
3204 if (MACHINE_HAS_GS) {
3205 preempt_disable();
3206 __ctl_set_bit(2, 4);
3207 if (current->thread.gs_cb) {
3208 vcpu->arch.host_gscb = current->thread.gs_cb;
3209 save_gs_cb(vcpu->arch.host_gscb);
3210 }
3211 if (vcpu->arch.gs_enabled) {
3212 current->thread.gs_cb = (struct gs_cb *)
3213 &vcpu->run->s.regs.gscb;
3214 restore_gs_cb(current->thread.gs_cb);
3215 }
3216 preempt_enable();
3217 }
3218
3219 kvm_run->kvm_dirty_regs = 0;
3220 }
3221
3222 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3223 {
3224 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3225 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3226 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3227 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3228 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3229 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3230 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3231 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3232 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3233 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3234 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3235 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3236 save_access_regs(vcpu->run->s.regs.acrs);
3237 restore_access_regs(vcpu->arch.host_acrs);
3238 /* Save guest register state */
3239 save_fpu_regs();
3240 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3241 /* Restore will be done lazily at return */
3242 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3243 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3244 if (MACHINE_HAS_GS) {
3245 __ctl_set_bit(2, 4);
3246 if (vcpu->arch.gs_enabled)
3247 save_gs_cb(current->thread.gs_cb);
3248 preempt_disable();
3249 current->thread.gs_cb = vcpu->arch.host_gscb;
3250 restore_gs_cb(vcpu->arch.host_gscb);
3251 preempt_enable();
3252 if (!vcpu->arch.host_gscb)
3253 __ctl_clear_bit(2, 4);
3254 vcpu->arch.host_gscb = NULL;
3255 }
3256
3257 }
3258
3259 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3260 {
3261 int rc;
3262 sigset_t sigsaved;
3263
3264 if (kvm_run->immediate_exit)
3265 return -EINTR;
3266
3267 if (guestdbg_exit_pending(vcpu)) {
3268 kvm_s390_prepare_debug_exit(vcpu);
3269 return 0;
3270 }
3271
3272 if (vcpu->sigset_active)
3273 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3274
3275 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3276 kvm_s390_vcpu_start(vcpu);
3277 } else if (is_vcpu_stopped(vcpu)) {
3278 pr_err_ratelimited("can't run stopped vcpu %d\n",
3279 vcpu->vcpu_id);
3280 return -EINVAL;
3281 }
3282
3283 sync_regs(vcpu, kvm_run);
3284 enable_cpu_timer_accounting(vcpu);
3285
3286 might_fault();
3287 rc = __vcpu_run(vcpu);
3288
3289 if (signal_pending(current) && !rc) {
3290 kvm_run->exit_reason = KVM_EXIT_INTR;
3291 rc = -EINTR;
3292 }
3293
3294 if (guestdbg_exit_pending(vcpu) && !rc) {
3295 kvm_s390_prepare_debug_exit(vcpu);
3296 rc = 0;
3297 }
3298
3299 if (rc == -EREMOTE) {
3300 /* userspace support is needed, kvm_run has been prepared */
3301 rc = 0;
3302 }
3303
3304 disable_cpu_timer_accounting(vcpu);
3305 store_regs(vcpu, kvm_run);
3306
3307 if (vcpu->sigset_active)
3308 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3309
3310 vcpu->stat.exit_userspace++;
3311 return rc;
3312 }
3313
3314 /*
3315 * store status at address
3316 * we use have two special cases:
3317 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3318 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3319 */
3320 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3321 {
3322 unsigned char archmode = 1;
3323 freg_t fprs[NUM_FPRS];
3324 unsigned int px;
3325 u64 clkcomp, cputm;
3326 int rc;
3327
3328 px = kvm_s390_get_prefix(vcpu);
3329 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3330 if (write_guest_abs(vcpu, 163, &archmode, 1))
3331 return -EFAULT;
3332 gpa = 0;
3333 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3334 if (write_guest_real(vcpu, 163, &archmode, 1))
3335 return -EFAULT;
3336 gpa = px;
3337 } else
3338 gpa -= __LC_FPREGS_SAVE_AREA;
3339
3340 /* manually convert vector registers if necessary */
3341 if (MACHINE_HAS_VX) {
3342 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3343 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3344 fprs, 128);
3345 } else {
3346 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3347 vcpu->run->s.regs.fprs, 128);
3348 }
3349 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3350 vcpu->run->s.regs.gprs, 128);
3351 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3352 &vcpu->arch.sie_block->gpsw, 16);
3353 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3354 &px, 4);
3355 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3356 &vcpu->run->s.regs.fpc, 4);
3357 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3358 &vcpu->arch.sie_block->todpr, 4);
3359 cputm = kvm_s390_get_cpu_timer(vcpu);
3360 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3361 &cputm, 8);
3362 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3363 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3364 &clkcomp, 8);
3365 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3366 &vcpu->run->s.regs.acrs, 64);
3367 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3368 &vcpu->arch.sie_block->gcr, 128);
3369 return rc ? -EFAULT : 0;
3370 }
3371
3372 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3373 {
3374 /*
3375 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3376 * switch in the run ioctl. Let's update our copies before we save
3377 * it into the save area
3378 */
3379 save_fpu_regs();
3380 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3381 save_access_regs(vcpu->run->s.regs.acrs);
3382
3383 return kvm_s390_store_status_unloaded(vcpu, addr);
3384 }
3385
3386 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3387 {
3388 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3389 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3390 }
3391
3392 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3393 {
3394 unsigned int i;
3395 struct kvm_vcpu *vcpu;
3396
3397 kvm_for_each_vcpu(i, vcpu, kvm) {
3398 __disable_ibs_on_vcpu(vcpu);
3399 }
3400 }
3401
3402 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3403 {
3404 if (!sclp.has_ibs)
3405 return;
3406 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3407 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3408 }
3409
3410 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3411 {
3412 int i, online_vcpus, started_vcpus = 0;
3413
3414 if (!is_vcpu_stopped(vcpu))
3415 return;
3416
3417 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3418 /* Only one cpu at a time may enter/leave the STOPPED state. */
3419 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3420 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3421
3422 for (i = 0; i < online_vcpus; i++) {
3423 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3424 started_vcpus++;
3425 }
3426
3427 if (started_vcpus == 0) {
3428 /* we're the only active VCPU -> speed it up */
3429 __enable_ibs_on_vcpu(vcpu);
3430 } else if (started_vcpus == 1) {
3431 /*
3432 * As we are starting a second VCPU, we have to disable
3433 * the IBS facility on all VCPUs to remove potentially
3434 * oustanding ENABLE requests.
3435 */
3436 __disable_ibs_on_all_vcpus(vcpu->kvm);
3437 }
3438
3439 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3440 /*
3441 * Another VCPU might have used IBS while we were offline.
3442 * Let's play safe and flush the VCPU at startup.
3443 */
3444 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3445 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3446 return;
3447 }
3448
3449 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3450 {
3451 int i, online_vcpus, started_vcpus = 0;
3452 struct kvm_vcpu *started_vcpu = NULL;
3453
3454 if (is_vcpu_stopped(vcpu))
3455 return;
3456
3457 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3458 /* Only one cpu at a time may enter/leave the STOPPED state. */
3459 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3460 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3461
3462 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3463 kvm_s390_clear_stop_irq(vcpu);
3464
3465 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3466 __disable_ibs_on_vcpu(vcpu);
3467
3468 for (i = 0; i < online_vcpus; i++) {
3469 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3470 started_vcpus++;
3471 started_vcpu = vcpu->kvm->vcpus[i];
3472 }
3473 }
3474
3475 if (started_vcpus == 1) {
3476 /*
3477 * As we only have one VCPU left, we want to enable the
3478 * IBS facility for that VCPU to speed it up.
3479 */
3480 __enable_ibs_on_vcpu(started_vcpu);
3481 }
3482
3483 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3484 return;
3485 }
3486
3487 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3488 struct kvm_enable_cap *cap)
3489 {
3490 int r;
3491
3492 if (cap->flags)
3493 return -EINVAL;
3494
3495 switch (cap->cap) {
3496 case KVM_CAP_S390_CSS_SUPPORT:
3497 if (!vcpu->kvm->arch.css_support) {
3498 vcpu->kvm->arch.css_support = 1;
3499 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3500 trace_kvm_s390_enable_css(vcpu->kvm);
3501 }
3502 r = 0;
3503 break;
3504 default:
3505 r = -EINVAL;
3506 break;
3507 }
3508 return r;
3509 }
3510
3511 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3512 struct kvm_s390_mem_op *mop)
3513 {
3514 void __user *uaddr = (void __user *)mop->buf;
3515 void *tmpbuf = NULL;
3516 int r, srcu_idx;
3517 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3518 | KVM_S390_MEMOP_F_CHECK_ONLY;
3519
3520 if (mop->flags & ~supported_flags)
3521 return -EINVAL;
3522
3523 if (mop->size > MEM_OP_MAX_SIZE)
3524 return -E2BIG;
3525
3526 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3527 tmpbuf = vmalloc(mop->size);
3528 if (!tmpbuf)
3529 return -ENOMEM;
3530 }
3531
3532 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3533
3534 switch (mop->op) {
3535 case KVM_S390_MEMOP_LOGICAL_READ:
3536 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3537 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3538 mop->size, GACC_FETCH);
3539 break;
3540 }
3541 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3542 if (r == 0) {
3543 if (copy_to_user(uaddr, tmpbuf, mop->size))
3544 r = -EFAULT;
3545 }
3546 break;
3547 case KVM_S390_MEMOP_LOGICAL_WRITE:
3548 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3549 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3550 mop->size, GACC_STORE);
3551 break;
3552 }
3553 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3554 r = -EFAULT;
3555 break;
3556 }
3557 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3558 break;
3559 default:
3560 r = -EINVAL;
3561 }
3562
3563 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3564
3565 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3566 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3567
3568 vfree(tmpbuf);
3569 return r;
3570 }
3571
3572 long kvm_arch_vcpu_ioctl(struct file *filp,
3573 unsigned int ioctl, unsigned long arg)
3574 {
3575 struct kvm_vcpu *vcpu = filp->private_data;
3576 void __user *argp = (void __user *)arg;
3577 int idx;
3578 long r;
3579
3580 switch (ioctl) {
3581 case KVM_S390_IRQ: {
3582 struct kvm_s390_irq s390irq;
3583
3584 r = -EFAULT;
3585 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3586 break;
3587 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3588 break;
3589 }
3590 case KVM_S390_INTERRUPT: {
3591 struct kvm_s390_interrupt s390int;
3592 struct kvm_s390_irq s390irq;
3593
3594 r = -EFAULT;
3595 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3596 break;
3597 if (s390int_to_s390irq(&s390int, &s390irq))
3598 return -EINVAL;
3599 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3600 break;
3601 }
3602 case KVM_S390_STORE_STATUS:
3603 idx = srcu_read_lock(&vcpu->kvm->srcu);
3604 r = kvm_s390_vcpu_store_status(vcpu, arg);
3605 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3606 break;
3607 case KVM_S390_SET_INITIAL_PSW: {
3608 psw_t psw;
3609
3610 r = -EFAULT;
3611 if (copy_from_user(&psw, argp, sizeof(psw)))
3612 break;
3613 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3614 break;
3615 }
3616 case KVM_S390_INITIAL_RESET:
3617 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3618 break;
3619 case KVM_SET_ONE_REG:
3620 case KVM_GET_ONE_REG: {
3621 struct kvm_one_reg reg;
3622 r = -EFAULT;
3623 if (copy_from_user(&reg, argp, sizeof(reg)))
3624 break;
3625 if (ioctl == KVM_SET_ONE_REG)
3626 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3627 else
3628 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3629 break;
3630 }
3631 #ifdef CONFIG_KVM_S390_UCONTROL
3632 case KVM_S390_UCAS_MAP: {
3633 struct kvm_s390_ucas_mapping ucasmap;
3634
3635 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3636 r = -EFAULT;
3637 break;
3638 }
3639
3640 if (!kvm_is_ucontrol(vcpu->kvm)) {
3641 r = -EINVAL;
3642 break;
3643 }
3644
3645 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3646 ucasmap.vcpu_addr, ucasmap.length);
3647 break;
3648 }
3649 case KVM_S390_UCAS_UNMAP: {
3650 struct kvm_s390_ucas_mapping ucasmap;
3651
3652 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3653 r = -EFAULT;
3654 break;
3655 }
3656
3657 if (!kvm_is_ucontrol(vcpu->kvm)) {
3658 r = -EINVAL;
3659 break;
3660 }
3661
3662 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3663 ucasmap.length);
3664 break;
3665 }
3666 #endif
3667 case KVM_S390_VCPU_FAULT: {
3668 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3669 break;
3670 }
3671 case KVM_ENABLE_CAP:
3672 {
3673 struct kvm_enable_cap cap;
3674 r = -EFAULT;
3675 if (copy_from_user(&cap, argp, sizeof(cap)))
3676 break;
3677 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3678 break;
3679 }
3680 case KVM_S390_MEM_OP: {
3681 struct kvm_s390_mem_op mem_op;
3682
3683 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3684 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3685 else
3686 r = -EFAULT;
3687 break;
3688 }
3689 case KVM_S390_SET_IRQ_STATE: {
3690 struct kvm_s390_irq_state irq_state;
3691
3692 r = -EFAULT;
3693 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3694 break;
3695 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3696 irq_state.len == 0 ||
3697 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3698 r = -EINVAL;
3699 break;
3700 }
3701 r = kvm_s390_set_irq_state(vcpu,
3702 (void __user *) irq_state.buf,
3703 irq_state.len);
3704 break;
3705 }
3706 case KVM_S390_GET_IRQ_STATE: {
3707 struct kvm_s390_irq_state irq_state;
3708
3709 r = -EFAULT;
3710 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3711 break;
3712 if (irq_state.len == 0) {
3713 r = -EINVAL;
3714 break;
3715 }
3716 r = kvm_s390_get_irq_state(vcpu,
3717 (__u8 __user *) irq_state.buf,
3718 irq_state.len);
3719 break;
3720 }
3721 default:
3722 r = -ENOTTY;
3723 }
3724 return r;
3725 }
3726
3727 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3728 {
3729 #ifdef CONFIG_KVM_S390_UCONTROL
3730 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3731 && (kvm_is_ucontrol(vcpu->kvm))) {
3732 vmf->page = virt_to_page(vcpu->arch.sie_block);
3733 get_page(vmf->page);
3734 return 0;
3735 }
3736 #endif
3737 return VM_FAULT_SIGBUS;
3738 }
3739
3740 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3741 unsigned long npages)
3742 {
3743 return 0;
3744 }
3745
3746 /* Section: memory related */
3747 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3748 struct kvm_memory_slot *memslot,
3749 const struct kvm_userspace_memory_region *mem,
3750 enum kvm_mr_change change)
3751 {
3752 /* A few sanity checks. We can have memory slots which have to be
3753 located/ended at a segment boundary (1MB). The memory in userland is
3754 ok to be fragmented into various different vmas. It is okay to mmap()
3755 and munmap() stuff in this slot after doing this call at any time */
3756
3757 if (mem->userspace_addr & 0xffffful)
3758 return -EINVAL;
3759
3760 if (mem->memory_size & 0xffffful)
3761 return -EINVAL;
3762
3763 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3764 return -EINVAL;
3765
3766 return 0;
3767 }
3768
3769 void kvm_arch_commit_memory_region(struct kvm *kvm,
3770 const struct kvm_userspace_memory_region *mem,
3771 const struct kvm_memory_slot *old,
3772 const struct kvm_memory_slot *new,
3773 enum kvm_mr_change change)
3774 {
3775 int rc;
3776
3777 /* If the basics of the memslot do not change, we do not want
3778 * to update the gmap. Every update causes several unnecessary
3779 * segment translation exceptions. This is usually handled just
3780 * fine by the normal fault handler + gmap, but it will also
3781 * cause faults on the prefix page of running guest CPUs.
3782 */
3783 if (old->userspace_addr == mem->userspace_addr &&
3784 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3785 old->npages * PAGE_SIZE == mem->memory_size)
3786 return;
3787
3788 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3789 mem->guest_phys_addr, mem->memory_size);
3790 if (rc)
3791 pr_warn("failed to commit memory region\n");
3792 return;
3793 }
3794
3795 static inline unsigned long nonhyp_mask(int i)
3796 {
3797 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3798
3799 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3800 }
3801
3802 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3803 {
3804 vcpu->valid_wakeup = false;
3805 }
3806
3807 static int __init kvm_s390_init(void)
3808 {
3809 int i;
3810
3811 if (!sclp.has_sief2) {
3812 pr_info("SIE not available\n");
3813 return -ENODEV;
3814 }
3815
3816 for (i = 0; i < 16; i++)
3817 kvm_s390_fac_list_mask[i] |=
3818 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3819
3820 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3821 }
3822
3823 static void __exit kvm_s390_exit(void)
3824 {
3825 kvm_exit();
3826 }
3827
3828 module_init(kvm_s390_init);
3829 module_exit(kvm_s390_exit);
3830
3831 /*
3832 * Enable autoloading of the kvm module.
3833 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3834 * since x86 takes a different approach.
3835 */
3836 #include <linux/miscdevice.h>
3837 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3838 MODULE_ALIAS("devname:kvm");