]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/s390/kvm/kvm-s390.c
mtd: nand: atmel: Relax tADL_min constraint
[mirror_ubuntu-artful-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 { NULL }
131 };
132
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144 return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159 /* every s390 is virtualization enabled ;-) */
160 return 0;
161 }
162
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164 unsigned long end);
165
166 /*
167 * This callback is executed during stop_machine(). All CPUs are therefore
168 * temporarily stopped. In order not to change guest behavior, we have to
169 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170 * so a CPU won't be stopped while calculating with the epoch.
171 */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173 void *v)
174 {
175 struct kvm *kvm;
176 struct kvm_vcpu *vcpu;
177 int i;
178 unsigned long long *delta = v;
179
180 list_for_each_entry(kvm, &vm_list, vm_list) {
181 kvm->arch.epoch -= *delta;
182 kvm_for_each_vcpu(i, vcpu, kvm) {
183 vcpu->arch.sie_block->epoch -= *delta;
184 if (vcpu->arch.cputm_enabled)
185 vcpu->arch.cputm_start += *delta;
186 if (vcpu->arch.vsie_block)
187 vcpu->arch.vsie_block->epoch -= *delta;
188 }
189 }
190 return NOTIFY_OK;
191 }
192
193 static struct notifier_block kvm_clock_notifier = {
194 .notifier_call = kvm_clock_sync,
195 };
196
197 int kvm_arch_hardware_setup(void)
198 {
199 gmap_notifier.notifier_call = kvm_gmap_notifier;
200 gmap_register_pte_notifier(&gmap_notifier);
201 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202 gmap_register_pte_notifier(&vsie_gmap_notifier);
203 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204 &kvm_clock_notifier);
205 return 0;
206 }
207
208 void kvm_arch_hardware_unsetup(void)
209 {
210 gmap_unregister_pte_notifier(&gmap_notifier);
211 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213 &kvm_clock_notifier);
214 }
215
216 static void allow_cpu_feat(unsigned long nr)
217 {
218 set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220
221 static inline int plo_test_bit(unsigned char nr)
222 {
223 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224 int cc;
225
226 asm volatile(
227 /* Parameter registers are ignored for "test bit" */
228 " plo 0,0,0,0(0)\n"
229 " ipm %0\n"
230 " srl %0,28\n"
231 : "=d" (cc)
232 : "d" (r0)
233 : "cc");
234 return cc == 0;
235 }
236
237 static void kvm_s390_cpu_feat_init(void)
238 {
239 int i;
240
241 for (i = 0; i < 256; ++i) {
242 if (plo_test_bit(i))
243 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244 }
245
246 if (test_facility(28)) /* TOD-clock steering */
247 ptff(kvm_s390_available_subfunc.ptff,
248 sizeof(kvm_s390_available_subfunc.ptff),
249 PTFF_QAF);
250
251 if (test_facility(17)) { /* MSA */
252 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.kmac);
254 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kmc);
256 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.km);
258 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kimd);
260 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.klmd);
262 }
263 if (test_facility(76)) /* MSA3 */
264 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.pckmo);
266 if (test_facility(77)) { /* MSA4 */
267 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmctr);
269 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.kmf);
271 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272 kvm_s390_available_subfunc.kmo);
273 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.pcc);
275 }
276 if (test_facility(57)) /* MSA5 */
277 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.ppno);
279
280 if (test_facility(146)) /* MSA8 */
281 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282 kvm_s390_available_subfunc.kma);
283
284 if (MACHINE_HAS_ESOP)
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286 /*
287 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289 */
290 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291 !test_facility(3) || !nested)
292 return;
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294 if (sclp.has_64bscao)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296 if (sclp.has_siif)
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298 if (sclp.has_gpere)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300 if (sclp.has_gsls)
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302 if (sclp.has_ib)
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304 if (sclp.has_cei)
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306 if (sclp.has_ibs)
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308 if (sclp.has_kss)
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310 /*
311 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312 * all skey handling functions read/set the skey from the PGSTE
313 * instead of the real storage key.
314 *
315 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316 * pages being detected as preserved although they are resident.
317 *
318 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320 *
321 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324 *
325 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326 * cannot easily shadow the SCA because of the ipte lock.
327 */
328 }
329
330 int kvm_arch_init(void *opaque)
331 {
332 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333 if (!kvm_s390_dbf)
334 return -ENOMEM;
335
336 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337 debug_unregister(kvm_s390_dbf);
338 return -ENOMEM;
339 }
340
341 kvm_s390_cpu_feat_init();
342
343 /* Register floating interrupt controller interface. */
344 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346
347 void kvm_arch_exit(void)
348 {
349 debug_unregister(kvm_s390_dbf);
350 }
351
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354 unsigned int ioctl, unsigned long arg)
355 {
356 if (ioctl == KVM_S390_ENABLE_SIE)
357 return s390_enable_sie();
358 return -EINVAL;
359 }
360
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363 int r;
364
365 switch (ext) {
366 case KVM_CAP_S390_PSW:
367 case KVM_CAP_S390_GMAP:
368 case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370 case KVM_CAP_S390_UCONTROL:
371 #endif
372 case KVM_CAP_ASYNC_PF:
373 case KVM_CAP_SYNC_REGS:
374 case KVM_CAP_ONE_REG:
375 case KVM_CAP_ENABLE_CAP:
376 case KVM_CAP_S390_CSS_SUPPORT:
377 case KVM_CAP_IOEVENTFD:
378 case KVM_CAP_DEVICE_CTRL:
379 case KVM_CAP_ENABLE_CAP_VM:
380 case KVM_CAP_S390_IRQCHIP:
381 case KVM_CAP_VM_ATTRIBUTES:
382 case KVM_CAP_MP_STATE:
383 case KVM_CAP_IMMEDIATE_EXIT:
384 case KVM_CAP_S390_INJECT_IRQ:
385 case KVM_CAP_S390_USER_SIGP:
386 case KVM_CAP_S390_USER_STSI:
387 case KVM_CAP_S390_SKEYS:
388 case KVM_CAP_S390_IRQ_STATE:
389 case KVM_CAP_S390_USER_INSTR0:
390 case KVM_CAP_S390_CMMA_MIGRATION:
391 case KVM_CAP_S390_AIS:
392 r = 1;
393 break;
394 case KVM_CAP_S390_MEM_OP:
395 r = MEM_OP_MAX_SIZE;
396 break;
397 case KVM_CAP_NR_VCPUS:
398 case KVM_CAP_MAX_VCPUS:
399 r = KVM_S390_BSCA_CPU_SLOTS;
400 if (!kvm_s390_use_sca_entries())
401 r = KVM_MAX_VCPUS;
402 else if (sclp.has_esca && sclp.has_64bscao)
403 r = KVM_S390_ESCA_CPU_SLOTS;
404 break;
405 case KVM_CAP_NR_MEMSLOTS:
406 r = KVM_USER_MEM_SLOTS;
407 break;
408 case KVM_CAP_S390_COW:
409 r = MACHINE_HAS_ESOP;
410 break;
411 case KVM_CAP_S390_VECTOR_REGISTERS:
412 r = MACHINE_HAS_VX;
413 break;
414 case KVM_CAP_S390_RI:
415 r = test_facility(64);
416 break;
417 case KVM_CAP_S390_GS:
418 r = test_facility(133);
419 break;
420 default:
421 r = 0;
422 }
423 return r;
424 }
425
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427 struct kvm_memory_slot *memslot)
428 {
429 gfn_t cur_gfn, last_gfn;
430 unsigned long address;
431 struct gmap *gmap = kvm->arch.gmap;
432
433 /* Loop over all guest pages */
434 last_gfn = memslot->base_gfn + memslot->npages;
435 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436 address = gfn_to_hva_memslot(memslot, cur_gfn);
437
438 if (test_and_clear_guest_dirty(gmap->mm, address))
439 mark_page_dirty(kvm, cur_gfn);
440 if (fatal_signal_pending(current))
441 return;
442 cond_resched();
443 }
444 }
445
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448
449 /*
450 * Get (and clear) the dirty memory log for a memory slot.
451 */
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453 struct kvm_dirty_log *log)
454 {
455 int r;
456 unsigned long n;
457 struct kvm_memslots *slots;
458 struct kvm_memory_slot *memslot;
459 int is_dirty = 0;
460
461 if (kvm_is_ucontrol(kvm))
462 return -EINVAL;
463
464 mutex_lock(&kvm->slots_lock);
465
466 r = -EINVAL;
467 if (log->slot >= KVM_USER_MEM_SLOTS)
468 goto out;
469
470 slots = kvm_memslots(kvm);
471 memslot = id_to_memslot(slots, log->slot);
472 r = -ENOENT;
473 if (!memslot->dirty_bitmap)
474 goto out;
475
476 kvm_s390_sync_dirty_log(kvm, memslot);
477 r = kvm_get_dirty_log(kvm, log, &is_dirty);
478 if (r)
479 goto out;
480
481 /* Clear the dirty log */
482 if (is_dirty) {
483 n = kvm_dirty_bitmap_bytes(memslot);
484 memset(memslot->dirty_bitmap, 0, n);
485 }
486 r = 0;
487 out:
488 mutex_unlock(&kvm->slots_lock);
489 return r;
490 }
491
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 {
494 unsigned int i;
495 struct kvm_vcpu *vcpu;
496
497 kvm_for_each_vcpu(i, vcpu, kvm) {
498 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
499 }
500 }
501
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
503 {
504 int r;
505
506 if (cap->flags)
507 return -EINVAL;
508
509 switch (cap->cap) {
510 case KVM_CAP_S390_IRQCHIP:
511 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512 kvm->arch.use_irqchip = 1;
513 r = 0;
514 break;
515 case KVM_CAP_S390_USER_SIGP:
516 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517 kvm->arch.user_sigp = 1;
518 r = 0;
519 break;
520 case KVM_CAP_S390_VECTOR_REGISTERS:
521 mutex_lock(&kvm->lock);
522 if (kvm->created_vcpus) {
523 r = -EBUSY;
524 } else if (MACHINE_HAS_VX) {
525 set_kvm_facility(kvm->arch.model.fac_mask, 129);
526 set_kvm_facility(kvm->arch.model.fac_list, 129);
527 if (test_facility(134)) {
528 set_kvm_facility(kvm->arch.model.fac_mask, 134);
529 set_kvm_facility(kvm->arch.model.fac_list, 134);
530 }
531 if (test_facility(135)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 135);
533 set_kvm_facility(kvm->arch.model.fac_list, 135);
534 }
535 r = 0;
536 } else
537 r = -EINVAL;
538 mutex_unlock(&kvm->lock);
539 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540 r ? "(not available)" : "(success)");
541 break;
542 case KVM_CAP_S390_RI:
543 r = -EINVAL;
544 mutex_lock(&kvm->lock);
545 if (kvm->created_vcpus) {
546 r = -EBUSY;
547 } else if (test_facility(64)) {
548 set_kvm_facility(kvm->arch.model.fac_mask, 64);
549 set_kvm_facility(kvm->arch.model.fac_list, 64);
550 r = 0;
551 }
552 mutex_unlock(&kvm->lock);
553 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554 r ? "(not available)" : "(success)");
555 break;
556 case KVM_CAP_S390_AIS:
557 mutex_lock(&kvm->lock);
558 if (kvm->created_vcpus) {
559 r = -EBUSY;
560 } else {
561 set_kvm_facility(kvm->arch.model.fac_mask, 72);
562 set_kvm_facility(kvm->arch.model.fac_list, 72);
563 r = 0;
564 }
565 mutex_unlock(&kvm->lock);
566 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
567 r ? "(not available)" : "(success)");
568 break;
569 case KVM_CAP_S390_GS:
570 r = -EINVAL;
571 mutex_lock(&kvm->lock);
572 if (atomic_read(&kvm->online_vcpus)) {
573 r = -EBUSY;
574 } else if (test_facility(133)) {
575 set_kvm_facility(kvm->arch.model.fac_mask, 133);
576 set_kvm_facility(kvm->arch.model.fac_list, 133);
577 r = 0;
578 }
579 mutex_unlock(&kvm->lock);
580 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
581 r ? "(not available)" : "(success)");
582 break;
583 case KVM_CAP_S390_USER_STSI:
584 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
585 kvm->arch.user_stsi = 1;
586 r = 0;
587 break;
588 case KVM_CAP_S390_USER_INSTR0:
589 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
590 kvm->arch.user_instr0 = 1;
591 icpt_operexc_on_all_vcpus(kvm);
592 r = 0;
593 break;
594 default:
595 r = -EINVAL;
596 break;
597 }
598 return r;
599 }
600
601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603 int ret;
604
605 switch (attr->attr) {
606 case KVM_S390_VM_MEM_LIMIT_SIZE:
607 ret = 0;
608 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
609 kvm->arch.mem_limit);
610 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
611 ret = -EFAULT;
612 break;
613 default:
614 ret = -ENXIO;
615 break;
616 }
617 return ret;
618 }
619
620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
621 {
622 int ret;
623 unsigned int idx;
624 switch (attr->attr) {
625 case KVM_S390_VM_MEM_ENABLE_CMMA:
626 ret = -ENXIO;
627 if (!sclp.has_cmma)
628 break;
629
630 ret = -EBUSY;
631 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
632 mutex_lock(&kvm->lock);
633 if (!kvm->created_vcpus) {
634 kvm->arch.use_cmma = 1;
635 ret = 0;
636 }
637 mutex_unlock(&kvm->lock);
638 break;
639 case KVM_S390_VM_MEM_CLR_CMMA:
640 ret = -ENXIO;
641 if (!sclp.has_cmma)
642 break;
643 ret = -EINVAL;
644 if (!kvm->arch.use_cmma)
645 break;
646
647 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
648 mutex_lock(&kvm->lock);
649 idx = srcu_read_lock(&kvm->srcu);
650 s390_reset_cmma(kvm->arch.gmap->mm);
651 srcu_read_unlock(&kvm->srcu, idx);
652 mutex_unlock(&kvm->lock);
653 ret = 0;
654 break;
655 case KVM_S390_VM_MEM_LIMIT_SIZE: {
656 unsigned long new_limit;
657
658 if (kvm_is_ucontrol(kvm))
659 return -EINVAL;
660
661 if (get_user(new_limit, (u64 __user *)attr->addr))
662 return -EFAULT;
663
664 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
665 new_limit > kvm->arch.mem_limit)
666 return -E2BIG;
667
668 if (!new_limit)
669 return -EINVAL;
670
671 /* gmap_create takes last usable address */
672 if (new_limit != KVM_S390_NO_MEM_LIMIT)
673 new_limit -= 1;
674
675 ret = -EBUSY;
676 mutex_lock(&kvm->lock);
677 if (!kvm->created_vcpus) {
678 /* gmap_create will round the limit up */
679 struct gmap *new = gmap_create(current->mm, new_limit);
680
681 if (!new) {
682 ret = -ENOMEM;
683 } else {
684 gmap_remove(kvm->arch.gmap);
685 new->private = kvm;
686 kvm->arch.gmap = new;
687 ret = 0;
688 }
689 }
690 mutex_unlock(&kvm->lock);
691 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
692 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
693 (void *) kvm->arch.gmap->asce);
694 break;
695 }
696 default:
697 ret = -ENXIO;
698 break;
699 }
700 return ret;
701 }
702
703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
704
705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
706 {
707 struct kvm_vcpu *vcpu;
708 int i;
709
710 if (!test_kvm_facility(kvm, 76))
711 return -EINVAL;
712
713 mutex_lock(&kvm->lock);
714 switch (attr->attr) {
715 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
716 get_random_bytes(
717 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
718 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
719 kvm->arch.crypto.aes_kw = 1;
720 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
721 break;
722 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
723 get_random_bytes(
724 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
725 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
726 kvm->arch.crypto.dea_kw = 1;
727 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
728 break;
729 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
730 kvm->arch.crypto.aes_kw = 0;
731 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
732 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
733 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
734 break;
735 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
736 kvm->arch.crypto.dea_kw = 0;
737 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
738 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
739 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
740 break;
741 default:
742 mutex_unlock(&kvm->lock);
743 return -ENXIO;
744 }
745
746 kvm_for_each_vcpu(i, vcpu, kvm) {
747 kvm_s390_vcpu_crypto_setup(vcpu);
748 exit_sie(vcpu);
749 }
750 mutex_unlock(&kvm->lock);
751 return 0;
752 }
753
754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
755 {
756 int cx;
757 struct kvm_vcpu *vcpu;
758
759 kvm_for_each_vcpu(cx, vcpu, kvm)
760 kvm_s390_sync_request(req, vcpu);
761 }
762
763 /*
764 * Must be called with kvm->srcu held to avoid races on memslots, and with
765 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
766 */
767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
768 {
769 struct kvm_s390_migration_state *mgs;
770 struct kvm_memory_slot *ms;
771 /* should be the only one */
772 struct kvm_memslots *slots;
773 unsigned long ram_pages;
774 int slotnr;
775
776 /* migration mode already enabled */
777 if (kvm->arch.migration_state)
778 return 0;
779
780 slots = kvm_memslots(kvm);
781 if (!slots || !slots->used_slots)
782 return -EINVAL;
783
784 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
785 if (!mgs)
786 return -ENOMEM;
787 kvm->arch.migration_state = mgs;
788
789 if (kvm->arch.use_cmma) {
790 /*
791 * Get the last slot. They should be sorted by base_gfn, so the
792 * last slot is also the one at the end of the address space.
793 * We have verified above that at least one slot is present.
794 */
795 ms = slots->memslots + slots->used_slots - 1;
796 /* round up so we only use full longs */
797 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
798 /* allocate enough bytes to store all the bits */
799 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
800 if (!mgs->pgste_bitmap) {
801 kfree(mgs);
802 kvm->arch.migration_state = NULL;
803 return -ENOMEM;
804 }
805
806 mgs->bitmap_size = ram_pages;
807 atomic64_set(&mgs->dirty_pages, ram_pages);
808 /* mark all the pages in active slots as dirty */
809 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
810 ms = slots->memslots + slotnr;
811 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
812 }
813
814 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
815 }
816 return 0;
817 }
818
819 /*
820 * Must be called with kvm->lock to avoid races with ourselves and
821 * kvm_s390_vm_start_migration.
822 */
823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
824 {
825 struct kvm_s390_migration_state *mgs;
826
827 /* migration mode already disabled */
828 if (!kvm->arch.migration_state)
829 return 0;
830 mgs = kvm->arch.migration_state;
831 kvm->arch.migration_state = NULL;
832
833 if (kvm->arch.use_cmma) {
834 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
835 vfree(mgs->pgste_bitmap);
836 }
837 kfree(mgs);
838 return 0;
839 }
840
841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
842 struct kvm_device_attr *attr)
843 {
844 int idx, res = -ENXIO;
845
846 mutex_lock(&kvm->lock);
847 switch (attr->attr) {
848 case KVM_S390_VM_MIGRATION_START:
849 idx = srcu_read_lock(&kvm->srcu);
850 res = kvm_s390_vm_start_migration(kvm);
851 srcu_read_unlock(&kvm->srcu, idx);
852 break;
853 case KVM_S390_VM_MIGRATION_STOP:
854 res = kvm_s390_vm_stop_migration(kvm);
855 break;
856 default:
857 break;
858 }
859 mutex_unlock(&kvm->lock);
860
861 return res;
862 }
863
864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
865 struct kvm_device_attr *attr)
866 {
867 u64 mig = (kvm->arch.migration_state != NULL);
868
869 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
870 return -ENXIO;
871
872 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
873 return -EFAULT;
874 return 0;
875 }
876
877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
878 {
879 u8 gtod_high;
880
881 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
882 sizeof(gtod_high)))
883 return -EFAULT;
884
885 if (gtod_high != 0)
886 return -EINVAL;
887 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
888
889 return 0;
890 }
891
892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
893 {
894 u64 gtod;
895
896 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
897 return -EFAULT;
898
899 kvm_s390_set_tod_clock(kvm, gtod);
900 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
901 return 0;
902 }
903
904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906 int ret;
907
908 if (attr->flags)
909 return -EINVAL;
910
911 switch (attr->attr) {
912 case KVM_S390_VM_TOD_HIGH:
913 ret = kvm_s390_set_tod_high(kvm, attr);
914 break;
915 case KVM_S390_VM_TOD_LOW:
916 ret = kvm_s390_set_tod_low(kvm, attr);
917 break;
918 default:
919 ret = -ENXIO;
920 break;
921 }
922 return ret;
923 }
924
925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 u8 gtod_high = 0;
928
929 if (copy_to_user((void __user *)attr->addr, &gtod_high,
930 sizeof(gtod_high)))
931 return -EFAULT;
932 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
933
934 return 0;
935 }
936
937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
938 {
939 u64 gtod;
940
941 gtod = kvm_s390_get_tod_clock_fast(kvm);
942 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
943 return -EFAULT;
944 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
945
946 return 0;
947 }
948
949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
950 {
951 int ret;
952
953 if (attr->flags)
954 return -EINVAL;
955
956 switch (attr->attr) {
957 case KVM_S390_VM_TOD_HIGH:
958 ret = kvm_s390_get_tod_high(kvm, attr);
959 break;
960 case KVM_S390_VM_TOD_LOW:
961 ret = kvm_s390_get_tod_low(kvm, attr);
962 break;
963 default:
964 ret = -ENXIO;
965 break;
966 }
967 return ret;
968 }
969
970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 struct kvm_s390_vm_cpu_processor *proc;
973 u16 lowest_ibc, unblocked_ibc;
974 int ret = 0;
975
976 mutex_lock(&kvm->lock);
977 if (kvm->created_vcpus) {
978 ret = -EBUSY;
979 goto out;
980 }
981 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
982 if (!proc) {
983 ret = -ENOMEM;
984 goto out;
985 }
986 if (!copy_from_user(proc, (void __user *)attr->addr,
987 sizeof(*proc))) {
988 kvm->arch.model.cpuid = proc->cpuid;
989 lowest_ibc = sclp.ibc >> 16 & 0xfff;
990 unblocked_ibc = sclp.ibc & 0xfff;
991 if (lowest_ibc && proc->ibc) {
992 if (proc->ibc > unblocked_ibc)
993 kvm->arch.model.ibc = unblocked_ibc;
994 else if (proc->ibc < lowest_ibc)
995 kvm->arch.model.ibc = lowest_ibc;
996 else
997 kvm->arch.model.ibc = proc->ibc;
998 }
999 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000 S390_ARCH_FAC_LIST_SIZE_BYTE);
1001 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002 kvm->arch.model.ibc,
1003 kvm->arch.model.cpuid);
1004 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005 kvm->arch.model.fac_list[0],
1006 kvm->arch.model.fac_list[1],
1007 kvm->arch.model.fac_list[2]);
1008 } else
1009 ret = -EFAULT;
1010 kfree(proc);
1011 out:
1012 mutex_unlock(&kvm->lock);
1013 return ret;
1014 }
1015
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017 struct kvm_device_attr *attr)
1018 {
1019 struct kvm_s390_vm_cpu_feat data;
1020 int ret = -EBUSY;
1021
1022 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023 return -EFAULT;
1024 if (!bitmap_subset((unsigned long *) data.feat,
1025 kvm_s390_available_cpu_feat,
1026 KVM_S390_VM_CPU_FEAT_NR_BITS))
1027 return -EINVAL;
1028
1029 mutex_lock(&kvm->lock);
1030 if (!atomic_read(&kvm->online_vcpus)) {
1031 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032 KVM_S390_VM_CPU_FEAT_NR_BITS);
1033 ret = 0;
1034 }
1035 mutex_unlock(&kvm->lock);
1036 return ret;
1037 }
1038
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040 struct kvm_device_attr *attr)
1041 {
1042 /*
1043 * Once supported by kernel + hw, we have to store the subfunctions
1044 * in kvm->arch and remember that user space configured them.
1045 */
1046 return -ENXIO;
1047 }
1048
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051 int ret = -ENXIO;
1052
1053 switch (attr->attr) {
1054 case KVM_S390_VM_CPU_PROCESSOR:
1055 ret = kvm_s390_set_processor(kvm, attr);
1056 break;
1057 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058 ret = kvm_s390_set_processor_feat(kvm, attr);
1059 break;
1060 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062 break;
1063 }
1064 return ret;
1065 }
1066
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069 struct kvm_s390_vm_cpu_processor *proc;
1070 int ret = 0;
1071
1072 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073 if (!proc) {
1074 ret = -ENOMEM;
1075 goto out;
1076 }
1077 proc->cpuid = kvm->arch.model.cpuid;
1078 proc->ibc = kvm->arch.model.ibc;
1079 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080 S390_ARCH_FAC_LIST_SIZE_BYTE);
1081 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082 kvm->arch.model.ibc,
1083 kvm->arch.model.cpuid);
1084 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085 kvm->arch.model.fac_list[0],
1086 kvm->arch.model.fac_list[1],
1087 kvm->arch.model.fac_list[2]);
1088 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089 ret = -EFAULT;
1090 kfree(proc);
1091 out:
1092 return ret;
1093 }
1094
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097 struct kvm_s390_vm_cpu_machine *mach;
1098 int ret = 0;
1099
1100 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101 if (!mach) {
1102 ret = -ENOMEM;
1103 goto out;
1104 }
1105 get_cpu_id((struct cpuid *) &mach->cpuid);
1106 mach->ibc = sclp.ibc;
1107 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108 S390_ARCH_FAC_LIST_SIZE_BYTE);
1109 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110 sizeof(S390_lowcore.stfle_fac_list));
1111 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1112 kvm->arch.model.ibc,
1113 kvm->arch.model.cpuid);
1114 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1115 mach->fac_mask[0],
1116 mach->fac_mask[1],
1117 mach->fac_mask[2]);
1118 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1119 mach->fac_list[0],
1120 mach->fac_list[1],
1121 mach->fac_list[2]);
1122 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123 ret = -EFAULT;
1124 kfree(mach);
1125 out:
1126 return ret;
1127 }
1128
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130 struct kvm_device_attr *attr)
1131 {
1132 struct kvm_s390_vm_cpu_feat data;
1133
1134 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135 KVM_S390_VM_CPU_FEAT_NR_BITS);
1136 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137 return -EFAULT;
1138 return 0;
1139 }
1140
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142 struct kvm_device_attr *attr)
1143 {
1144 struct kvm_s390_vm_cpu_feat data;
1145
1146 bitmap_copy((unsigned long *) data.feat,
1147 kvm_s390_available_cpu_feat,
1148 KVM_S390_VM_CPU_FEAT_NR_BITS);
1149 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150 return -EFAULT;
1151 return 0;
1152 }
1153
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155 struct kvm_device_attr *attr)
1156 {
1157 /*
1158 * Once we can actually configure subfunctions (kernel + hw support),
1159 * we have to check if they were already set by user space, if so copy
1160 * them from kvm->arch.
1161 */
1162 return -ENXIO;
1163 }
1164
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166 struct kvm_device_attr *attr)
1167 {
1168 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170 return -EFAULT;
1171 return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175 int ret = -ENXIO;
1176
1177 switch (attr->attr) {
1178 case KVM_S390_VM_CPU_PROCESSOR:
1179 ret = kvm_s390_get_processor(kvm, attr);
1180 break;
1181 case KVM_S390_VM_CPU_MACHINE:
1182 ret = kvm_s390_get_machine(kvm, attr);
1183 break;
1184 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185 ret = kvm_s390_get_processor_feat(kvm, attr);
1186 break;
1187 case KVM_S390_VM_CPU_MACHINE_FEAT:
1188 ret = kvm_s390_get_machine_feat(kvm, attr);
1189 break;
1190 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192 break;
1193 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195 break;
1196 }
1197 return ret;
1198 }
1199
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202 int ret;
1203
1204 switch (attr->group) {
1205 case KVM_S390_VM_MEM_CTRL:
1206 ret = kvm_s390_set_mem_control(kvm, attr);
1207 break;
1208 case KVM_S390_VM_TOD:
1209 ret = kvm_s390_set_tod(kvm, attr);
1210 break;
1211 case KVM_S390_VM_CPU_MODEL:
1212 ret = kvm_s390_set_cpu_model(kvm, attr);
1213 break;
1214 case KVM_S390_VM_CRYPTO:
1215 ret = kvm_s390_vm_set_crypto(kvm, attr);
1216 break;
1217 case KVM_S390_VM_MIGRATION:
1218 ret = kvm_s390_vm_set_migration(kvm, attr);
1219 break;
1220 default:
1221 ret = -ENXIO;
1222 break;
1223 }
1224
1225 return ret;
1226 }
1227
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230 int ret;
1231
1232 switch (attr->group) {
1233 case KVM_S390_VM_MEM_CTRL:
1234 ret = kvm_s390_get_mem_control(kvm, attr);
1235 break;
1236 case KVM_S390_VM_TOD:
1237 ret = kvm_s390_get_tod(kvm, attr);
1238 break;
1239 case KVM_S390_VM_CPU_MODEL:
1240 ret = kvm_s390_get_cpu_model(kvm, attr);
1241 break;
1242 case KVM_S390_VM_MIGRATION:
1243 ret = kvm_s390_vm_get_migration(kvm, attr);
1244 break;
1245 default:
1246 ret = -ENXIO;
1247 break;
1248 }
1249
1250 return ret;
1251 }
1252
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255 int ret;
1256
1257 switch (attr->group) {
1258 case KVM_S390_VM_MEM_CTRL:
1259 switch (attr->attr) {
1260 case KVM_S390_VM_MEM_ENABLE_CMMA:
1261 case KVM_S390_VM_MEM_CLR_CMMA:
1262 ret = sclp.has_cmma ? 0 : -ENXIO;
1263 break;
1264 case KVM_S390_VM_MEM_LIMIT_SIZE:
1265 ret = 0;
1266 break;
1267 default:
1268 ret = -ENXIO;
1269 break;
1270 }
1271 break;
1272 case KVM_S390_VM_TOD:
1273 switch (attr->attr) {
1274 case KVM_S390_VM_TOD_LOW:
1275 case KVM_S390_VM_TOD_HIGH:
1276 ret = 0;
1277 break;
1278 default:
1279 ret = -ENXIO;
1280 break;
1281 }
1282 break;
1283 case KVM_S390_VM_CPU_MODEL:
1284 switch (attr->attr) {
1285 case KVM_S390_VM_CPU_PROCESSOR:
1286 case KVM_S390_VM_CPU_MACHINE:
1287 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288 case KVM_S390_VM_CPU_MACHINE_FEAT:
1289 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290 ret = 0;
1291 break;
1292 /* configuring subfunctions is not supported yet */
1293 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294 default:
1295 ret = -ENXIO;
1296 break;
1297 }
1298 break;
1299 case KVM_S390_VM_CRYPTO:
1300 switch (attr->attr) {
1301 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305 ret = 0;
1306 break;
1307 default:
1308 ret = -ENXIO;
1309 break;
1310 }
1311 break;
1312 case KVM_S390_VM_MIGRATION:
1313 ret = 0;
1314 break;
1315 default:
1316 ret = -ENXIO;
1317 break;
1318 }
1319
1320 return ret;
1321 }
1322
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325 uint8_t *keys;
1326 uint64_t hva;
1327 int i, r = 0;
1328
1329 if (args->flags != 0)
1330 return -EINVAL;
1331
1332 /* Is this guest using storage keys? */
1333 if (!mm_use_skey(current->mm))
1334 return KVM_S390_GET_SKEYS_NONE;
1335
1336 /* Enforce sane limit on memory allocation */
1337 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338 return -EINVAL;
1339
1340 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341 if (!keys)
1342 return -ENOMEM;
1343
1344 down_read(&current->mm->mmap_sem);
1345 for (i = 0; i < args->count; i++) {
1346 hva = gfn_to_hva(kvm, args->start_gfn + i);
1347 if (kvm_is_error_hva(hva)) {
1348 r = -EFAULT;
1349 break;
1350 }
1351
1352 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1353 if (r)
1354 break;
1355 }
1356 up_read(&current->mm->mmap_sem);
1357
1358 if (!r) {
1359 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1360 sizeof(uint8_t) * args->count);
1361 if (r)
1362 r = -EFAULT;
1363 }
1364
1365 kvfree(keys);
1366 return r;
1367 }
1368
1369 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1370 {
1371 uint8_t *keys;
1372 uint64_t hva;
1373 int i, r = 0;
1374
1375 if (args->flags != 0)
1376 return -EINVAL;
1377
1378 /* Enforce sane limit on memory allocation */
1379 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1380 return -EINVAL;
1381
1382 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1383 if (!keys)
1384 return -ENOMEM;
1385
1386 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1387 sizeof(uint8_t) * args->count);
1388 if (r) {
1389 r = -EFAULT;
1390 goto out;
1391 }
1392
1393 /* Enable storage key handling for the guest */
1394 r = s390_enable_skey();
1395 if (r)
1396 goto out;
1397
1398 down_read(&current->mm->mmap_sem);
1399 for (i = 0; i < args->count; i++) {
1400 hva = gfn_to_hva(kvm, args->start_gfn + i);
1401 if (kvm_is_error_hva(hva)) {
1402 r = -EFAULT;
1403 break;
1404 }
1405
1406 /* Lowest order bit is reserved */
1407 if (keys[i] & 0x01) {
1408 r = -EINVAL;
1409 break;
1410 }
1411
1412 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1413 if (r)
1414 break;
1415 }
1416 up_read(&current->mm->mmap_sem);
1417 out:
1418 kvfree(keys);
1419 return r;
1420 }
1421
1422 /*
1423 * Base address and length must be sent at the start of each block, therefore
1424 * it's cheaper to send some clean data, as long as it's less than the size of
1425 * two longs.
1426 */
1427 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1428 /* for consistency */
1429 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1430
1431 /*
1432 * This function searches for the next page with dirty CMMA attributes, and
1433 * saves the attributes in the buffer up to either the end of the buffer or
1434 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1435 * no trailing clean bytes are saved.
1436 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1437 * output buffer will indicate 0 as length.
1438 */
1439 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1440 struct kvm_s390_cmma_log *args)
1441 {
1442 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1443 unsigned long bufsize, hva, pgstev, i, next, cur;
1444 int srcu_idx, peek, r = 0, rr;
1445 u8 *res;
1446
1447 cur = args->start_gfn;
1448 i = next = pgstev = 0;
1449
1450 if (unlikely(!kvm->arch.use_cmma))
1451 return -ENXIO;
1452 /* Invalid/unsupported flags were specified */
1453 if (args->flags & ~KVM_S390_CMMA_PEEK)
1454 return -EINVAL;
1455 /* Migration mode query, and we are not doing a migration */
1456 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1457 if (!peek && !s)
1458 return -EINVAL;
1459 /* CMMA is disabled or was not used, or the buffer has length zero */
1460 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1461 if (!bufsize || !kvm->mm->context.use_cmma) {
1462 memset(args, 0, sizeof(*args));
1463 return 0;
1464 }
1465
1466 if (!peek) {
1467 /* We are not peeking, and there are no dirty pages */
1468 if (!atomic64_read(&s->dirty_pages)) {
1469 memset(args, 0, sizeof(*args));
1470 return 0;
1471 }
1472 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1473 args->start_gfn);
1474 if (cur >= s->bitmap_size) /* nothing found, loop back */
1475 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1476 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1477 memset(args, 0, sizeof(*args));
1478 return 0;
1479 }
1480 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1481 }
1482
1483 res = vmalloc(bufsize);
1484 if (!res)
1485 return -ENOMEM;
1486
1487 args->start_gfn = cur;
1488
1489 down_read(&kvm->mm->mmap_sem);
1490 srcu_idx = srcu_read_lock(&kvm->srcu);
1491 while (i < bufsize) {
1492 hva = gfn_to_hva(kvm, cur);
1493 if (kvm_is_error_hva(hva)) {
1494 r = -EFAULT;
1495 break;
1496 }
1497 /* decrement only if we actually flipped the bit to 0 */
1498 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1499 atomic64_dec(&s->dirty_pages);
1500 r = get_pgste(kvm->mm, hva, &pgstev);
1501 if (r < 0)
1502 pgstev = 0;
1503 /* save the value */
1504 res[i++] = (pgstev >> 24) & 0x3;
1505 /*
1506 * if the next bit is too far away, stop.
1507 * if we reached the previous "next", find the next one
1508 */
1509 if (!peek) {
1510 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1511 break;
1512 if (cur == next)
1513 next = find_next_bit(s->pgste_bitmap,
1514 s->bitmap_size, cur + 1);
1515 /* reached the end of the bitmap or of the buffer, stop */
1516 if ((next >= s->bitmap_size) ||
1517 (next >= args->start_gfn + bufsize))
1518 break;
1519 }
1520 cur++;
1521 }
1522 srcu_read_unlock(&kvm->srcu, srcu_idx);
1523 up_read(&kvm->mm->mmap_sem);
1524 args->count = i;
1525 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1526
1527 rr = copy_to_user((void __user *)args->values, res, args->count);
1528 if (rr)
1529 r = -EFAULT;
1530
1531 vfree(res);
1532 return r;
1533 }
1534
1535 /*
1536 * This function sets the CMMA attributes for the given pages. If the input
1537 * buffer has zero length, no action is taken, otherwise the attributes are
1538 * set and the mm->context.use_cmma flag is set.
1539 */
1540 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1541 const struct kvm_s390_cmma_log *args)
1542 {
1543 unsigned long hva, mask, pgstev, i;
1544 uint8_t *bits;
1545 int srcu_idx, r = 0;
1546
1547 mask = args->mask;
1548
1549 if (!kvm->arch.use_cmma)
1550 return -ENXIO;
1551 /* invalid/unsupported flags */
1552 if (args->flags != 0)
1553 return -EINVAL;
1554 /* Enforce sane limit on memory allocation */
1555 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1556 return -EINVAL;
1557 /* Nothing to do */
1558 if (args->count == 0)
1559 return 0;
1560
1561 bits = vmalloc(sizeof(*bits) * args->count);
1562 if (!bits)
1563 return -ENOMEM;
1564
1565 r = copy_from_user(bits, (void __user *)args->values, args->count);
1566 if (r) {
1567 r = -EFAULT;
1568 goto out;
1569 }
1570
1571 down_read(&kvm->mm->mmap_sem);
1572 srcu_idx = srcu_read_lock(&kvm->srcu);
1573 for (i = 0; i < args->count; i++) {
1574 hva = gfn_to_hva(kvm, args->start_gfn + i);
1575 if (kvm_is_error_hva(hva)) {
1576 r = -EFAULT;
1577 break;
1578 }
1579
1580 pgstev = bits[i];
1581 pgstev = pgstev << 24;
1582 mask &= _PGSTE_GPS_USAGE_MASK;
1583 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1584 }
1585 srcu_read_unlock(&kvm->srcu, srcu_idx);
1586 up_read(&kvm->mm->mmap_sem);
1587
1588 if (!kvm->mm->context.use_cmma) {
1589 down_write(&kvm->mm->mmap_sem);
1590 kvm->mm->context.use_cmma = 1;
1591 up_write(&kvm->mm->mmap_sem);
1592 }
1593 out:
1594 vfree(bits);
1595 return r;
1596 }
1597
1598 long kvm_arch_vm_ioctl(struct file *filp,
1599 unsigned int ioctl, unsigned long arg)
1600 {
1601 struct kvm *kvm = filp->private_data;
1602 void __user *argp = (void __user *)arg;
1603 struct kvm_device_attr attr;
1604 int r;
1605
1606 switch (ioctl) {
1607 case KVM_S390_INTERRUPT: {
1608 struct kvm_s390_interrupt s390int;
1609
1610 r = -EFAULT;
1611 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1612 break;
1613 r = kvm_s390_inject_vm(kvm, &s390int);
1614 break;
1615 }
1616 case KVM_ENABLE_CAP: {
1617 struct kvm_enable_cap cap;
1618 r = -EFAULT;
1619 if (copy_from_user(&cap, argp, sizeof(cap)))
1620 break;
1621 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1622 break;
1623 }
1624 case KVM_CREATE_IRQCHIP: {
1625 struct kvm_irq_routing_entry routing;
1626
1627 r = -EINVAL;
1628 if (kvm->arch.use_irqchip) {
1629 /* Set up dummy routing. */
1630 memset(&routing, 0, sizeof(routing));
1631 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1632 }
1633 break;
1634 }
1635 case KVM_SET_DEVICE_ATTR: {
1636 r = -EFAULT;
1637 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1638 break;
1639 r = kvm_s390_vm_set_attr(kvm, &attr);
1640 break;
1641 }
1642 case KVM_GET_DEVICE_ATTR: {
1643 r = -EFAULT;
1644 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1645 break;
1646 r = kvm_s390_vm_get_attr(kvm, &attr);
1647 break;
1648 }
1649 case KVM_HAS_DEVICE_ATTR: {
1650 r = -EFAULT;
1651 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1652 break;
1653 r = kvm_s390_vm_has_attr(kvm, &attr);
1654 break;
1655 }
1656 case KVM_S390_GET_SKEYS: {
1657 struct kvm_s390_skeys args;
1658
1659 r = -EFAULT;
1660 if (copy_from_user(&args, argp,
1661 sizeof(struct kvm_s390_skeys)))
1662 break;
1663 r = kvm_s390_get_skeys(kvm, &args);
1664 break;
1665 }
1666 case KVM_S390_SET_SKEYS: {
1667 struct kvm_s390_skeys args;
1668
1669 r = -EFAULT;
1670 if (copy_from_user(&args, argp,
1671 sizeof(struct kvm_s390_skeys)))
1672 break;
1673 r = kvm_s390_set_skeys(kvm, &args);
1674 break;
1675 }
1676 case KVM_S390_GET_CMMA_BITS: {
1677 struct kvm_s390_cmma_log args;
1678
1679 r = -EFAULT;
1680 if (copy_from_user(&args, argp, sizeof(args)))
1681 break;
1682 r = kvm_s390_get_cmma_bits(kvm, &args);
1683 if (!r) {
1684 r = copy_to_user(argp, &args, sizeof(args));
1685 if (r)
1686 r = -EFAULT;
1687 }
1688 break;
1689 }
1690 case KVM_S390_SET_CMMA_BITS: {
1691 struct kvm_s390_cmma_log args;
1692
1693 r = -EFAULT;
1694 if (copy_from_user(&args, argp, sizeof(args)))
1695 break;
1696 r = kvm_s390_set_cmma_bits(kvm, &args);
1697 break;
1698 }
1699 default:
1700 r = -ENOTTY;
1701 }
1702
1703 return r;
1704 }
1705
1706 static int kvm_s390_query_ap_config(u8 *config)
1707 {
1708 u32 fcn_code = 0x04000000UL;
1709 u32 cc = 0;
1710
1711 memset(config, 0, 128);
1712 asm volatile(
1713 "lgr 0,%1\n"
1714 "lgr 2,%2\n"
1715 ".long 0xb2af0000\n" /* PQAP(QCI) */
1716 "0: ipm %0\n"
1717 "srl %0,28\n"
1718 "1:\n"
1719 EX_TABLE(0b, 1b)
1720 : "+r" (cc)
1721 : "r" (fcn_code), "r" (config)
1722 : "cc", "0", "2", "memory"
1723 );
1724
1725 return cc;
1726 }
1727
1728 static int kvm_s390_apxa_installed(void)
1729 {
1730 u8 config[128];
1731 int cc;
1732
1733 if (test_facility(12)) {
1734 cc = kvm_s390_query_ap_config(config);
1735
1736 if (cc)
1737 pr_err("PQAP(QCI) failed with cc=%d", cc);
1738 else
1739 return config[0] & 0x40;
1740 }
1741
1742 return 0;
1743 }
1744
1745 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1746 {
1747 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1748
1749 if (kvm_s390_apxa_installed())
1750 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1751 else
1752 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1753 }
1754
1755 static u64 kvm_s390_get_initial_cpuid(void)
1756 {
1757 struct cpuid cpuid;
1758
1759 get_cpu_id(&cpuid);
1760 cpuid.version = 0xff;
1761 return *((u64 *) &cpuid);
1762 }
1763
1764 static void kvm_s390_crypto_init(struct kvm *kvm)
1765 {
1766 if (!test_kvm_facility(kvm, 76))
1767 return;
1768
1769 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1770 kvm_s390_set_crycb_format(kvm);
1771
1772 /* Enable AES/DEA protected key functions by default */
1773 kvm->arch.crypto.aes_kw = 1;
1774 kvm->arch.crypto.dea_kw = 1;
1775 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1776 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1777 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1778 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1779 }
1780
1781 static void sca_dispose(struct kvm *kvm)
1782 {
1783 if (kvm->arch.use_esca)
1784 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1785 else
1786 free_page((unsigned long)(kvm->arch.sca));
1787 kvm->arch.sca = NULL;
1788 }
1789
1790 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1791 {
1792 gfp_t alloc_flags = GFP_KERNEL;
1793 int i, rc;
1794 char debug_name[16];
1795 static unsigned long sca_offset;
1796
1797 rc = -EINVAL;
1798 #ifdef CONFIG_KVM_S390_UCONTROL
1799 if (type & ~KVM_VM_S390_UCONTROL)
1800 goto out_err;
1801 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1802 goto out_err;
1803 #else
1804 if (type)
1805 goto out_err;
1806 #endif
1807
1808 rc = s390_enable_sie();
1809 if (rc)
1810 goto out_err;
1811
1812 rc = -ENOMEM;
1813
1814 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1815
1816 kvm->arch.use_esca = 0; /* start with basic SCA */
1817 if (!sclp.has_64bscao)
1818 alloc_flags |= GFP_DMA;
1819 rwlock_init(&kvm->arch.sca_lock);
1820 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1821 if (!kvm->arch.sca)
1822 goto out_err;
1823 spin_lock(&kvm_lock);
1824 sca_offset += 16;
1825 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1826 sca_offset = 0;
1827 kvm->arch.sca = (struct bsca_block *)
1828 ((char *) kvm->arch.sca + sca_offset);
1829 spin_unlock(&kvm_lock);
1830
1831 sprintf(debug_name, "kvm-%u", current->pid);
1832
1833 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1834 if (!kvm->arch.dbf)
1835 goto out_err;
1836
1837 kvm->arch.sie_page2 =
1838 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1839 if (!kvm->arch.sie_page2)
1840 goto out_err;
1841
1842 /* Populate the facility mask initially. */
1843 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1844 sizeof(S390_lowcore.stfle_fac_list));
1845 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1846 if (i < kvm_s390_fac_list_mask_size())
1847 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1848 else
1849 kvm->arch.model.fac_mask[i] = 0UL;
1850 }
1851
1852 /* Populate the facility list initially. */
1853 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1854 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1855 S390_ARCH_FAC_LIST_SIZE_BYTE);
1856
1857 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1858 set_kvm_facility(kvm->arch.model.fac_list, 74);
1859
1860 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1861 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1862
1863 kvm_s390_crypto_init(kvm);
1864
1865 mutex_init(&kvm->arch.float_int.ais_lock);
1866 kvm->arch.float_int.simm = 0;
1867 kvm->arch.float_int.nimm = 0;
1868 spin_lock_init(&kvm->arch.float_int.lock);
1869 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1870 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1871 init_waitqueue_head(&kvm->arch.ipte_wq);
1872 mutex_init(&kvm->arch.ipte_mutex);
1873
1874 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1875 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1876
1877 if (type & KVM_VM_S390_UCONTROL) {
1878 kvm->arch.gmap = NULL;
1879 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1880 } else {
1881 if (sclp.hamax == U64_MAX)
1882 kvm->arch.mem_limit = TASK_SIZE_MAX;
1883 else
1884 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1885 sclp.hamax + 1);
1886 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1887 if (!kvm->arch.gmap)
1888 goto out_err;
1889 kvm->arch.gmap->private = kvm;
1890 kvm->arch.gmap->pfault_enabled = 0;
1891 }
1892
1893 kvm->arch.css_support = 0;
1894 kvm->arch.use_irqchip = 0;
1895 kvm->arch.epoch = 0;
1896
1897 spin_lock_init(&kvm->arch.start_stop_lock);
1898 kvm_s390_vsie_init(kvm);
1899 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1900
1901 return 0;
1902 out_err:
1903 free_page((unsigned long)kvm->arch.sie_page2);
1904 debug_unregister(kvm->arch.dbf);
1905 sca_dispose(kvm);
1906 KVM_EVENT(3, "creation of vm failed: %d", rc);
1907 return rc;
1908 }
1909
1910 bool kvm_arch_has_vcpu_debugfs(void)
1911 {
1912 return false;
1913 }
1914
1915 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1916 {
1917 return 0;
1918 }
1919
1920 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1921 {
1922 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1923 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1924 kvm_s390_clear_local_irqs(vcpu);
1925 kvm_clear_async_pf_completion_queue(vcpu);
1926 if (!kvm_is_ucontrol(vcpu->kvm))
1927 sca_del_vcpu(vcpu);
1928
1929 if (kvm_is_ucontrol(vcpu->kvm))
1930 gmap_remove(vcpu->arch.gmap);
1931
1932 if (vcpu->kvm->arch.use_cmma)
1933 kvm_s390_vcpu_unsetup_cmma(vcpu);
1934 free_page((unsigned long)(vcpu->arch.sie_block));
1935
1936 kvm_vcpu_uninit(vcpu);
1937 kmem_cache_free(kvm_vcpu_cache, vcpu);
1938 }
1939
1940 static void kvm_free_vcpus(struct kvm *kvm)
1941 {
1942 unsigned int i;
1943 struct kvm_vcpu *vcpu;
1944
1945 kvm_for_each_vcpu(i, vcpu, kvm)
1946 kvm_arch_vcpu_destroy(vcpu);
1947
1948 mutex_lock(&kvm->lock);
1949 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1950 kvm->vcpus[i] = NULL;
1951
1952 atomic_set(&kvm->online_vcpus, 0);
1953 mutex_unlock(&kvm->lock);
1954 }
1955
1956 void kvm_arch_destroy_vm(struct kvm *kvm)
1957 {
1958 kvm_free_vcpus(kvm);
1959 sca_dispose(kvm);
1960 debug_unregister(kvm->arch.dbf);
1961 free_page((unsigned long)kvm->arch.sie_page2);
1962 if (!kvm_is_ucontrol(kvm))
1963 gmap_remove(kvm->arch.gmap);
1964 kvm_s390_destroy_adapters(kvm);
1965 kvm_s390_clear_float_irqs(kvm);
1966 kvm_s390_vsie_destroy(kvm);
1967 if (kvm->arch.migration_state) {
1968 vfree(kvm->arch.migration_state->pgste_bitmap);
1969 kfree(kvm->arch.migration_state);
1970 }
1971 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1972 }
1973
1974 /* Section: vcpu related */
1975 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1976 {
1977 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1978 if (!vcpu->arch.gmap)
1979 return -ENOMEM;
1980 vcpu->arch.gmap->private = vcpu->kvm;
1981
1982 return 0;
1983 }
1984
1985 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1986 {
1987 if (!kvm_s390_use_sca_entries())
1988 return;
1989 read_lock(&vcpu->kvm->arch.sca_lock);
1990 if (vcpu->kvm->arch.use_esca) {
1991 struct esca_block *sca = vcpu->kvm->arch.sca;
1992
1993 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1994 sca->cpu[vcpu->vcpu_id].sda = 0;
1995 } else {
1996 struct bsca_block *sca = vcpu->kvm->arch.sca;
1997
1998 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1999 sca->cpu[vcpu->vcpu_id].sda = 0;
2000 }
2001 read_unlock(&vcpu->kvm->arch.sca_lock);
2002 }
2003
2004 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2005 {
2006 if (!kvm_s390_use_sca_entries()) {
2007 struct bsca_block *sca = vcpu->kvm->arch.sca;
2008
2009 /* we still need the basic sca for the ipte control */
2010 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2011 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2012 }
2013 read_lock(&vcpu->kvm->arch.sca_lock);
2014 if (vcpu->kvm->arch.use_esca) {
2015 struct esca_block *sca = vcpu->kvm->arch.sca;
2016
2017 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2018 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2019 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2020 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2021 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2022 } else {
2023 struct bsca_block *sca = vcpu->kvm->arch.sca;
2024
2025 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2026 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2027 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2028 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2029 }
2030 read_unlock(&vcpu->kvm->arch.sca_lock);
2031 }
2032
2033 /* Basic SCA to Extended SCA data copy routines */
2034 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2035 {
2036 d->sda = s->sda;
2037 d->sigp_ctrl.c = s->sigp_ctrl.c;
2038 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2039 }
2040
2041 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2042 {
2043 int i;
2044
2045 d->ipte_control = s->ipte_control;
2046 d->mcn[0] = s->mcn;
2047 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2048 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2049 }
2050
2051 static int sca_switch_to_extended(struct kvm *kvm)
2052 {
2053 struct bsca_block *old_sca = kvm->arch.sca;
2054 struct esca_block *new_sca;
2055 struct kvm_vcpu *vcpu;
2056 unsigned int vcpu_idx;
2057 u32 scaol, scaoh;
2058
2059 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2060 if (!new_sca)
2061 return -ENOMEM;
2062
2063 scaoh = (u32)((u64)(new_sca) >> 32);
2064 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2065
2066 kvm_s390_vcpu_block_all(kvm);
2067 write_lock(&kvm->arch.sca_lock);
2068
2069 sca_copy_b_to_e(new_sca, old_sca);
2070
2071 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2072 vcpu->arch.sie_block->scaoh = scaoh;
2073 vcpu->arch.sie_block->scaol = scaol;
2074 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2075 }
2076 kvm->arch.sca = new_sca;
2077 kvm->arch.use_esca = 1;
2078
2079 write_unlock(&kvm->arch.sca_lock);
2080 kvm_s390_vcpu_unblock_all(kvm);
2081
2082 free_page((unsigned long)old_sca);
2083
2084 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2085 old_sca, kvm->arch.sca);
2086 return 0;
2087 }
2088
2089 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2090 {
2091 int rc;
2092
2093 if (!kvm_s390_use_sca_entries()) {
2094 if (id < KVM_MAX_VCPUS)
2095 return true;
2096 return false;
2097 }
2098 if (id < KVM_S390_BSCA_CPU_SLOTS)
2099 return true;
2100 if (!sclp.has_esca || !sclp.has_64bscao)
2101 return false;
2102
2103 mutex_lock(&kvm->lock);
2104 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2105 mutex_unlock(&kvm->lock);
2106
2107 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2108 }
2109
2110 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2111 {
2112 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2113 kvm_clear_async_pf_completion_queue(vcpu);
2114 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2115 KVM_SYNC_GPRS |
2116 KVM_SYNC_ACRS |
2117 KVM_SYNC_CRS |
2118 KVM_SYNC_ARCH0 |
2119 KVM_SYNC_PFAULT;
2120 kvm_s390_set_prefix(vcpu, 0);
2121 if (test_kvm_facility(vcpu->kvm, 64))
2122 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2123 if (test_kvm_facility(vcpu->kvm, 133))
2124 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2125 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2126 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2127 */
2128 if (MACHINE_HAS_VX)
2129 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2130 else
2131 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2132
2133 if (kvm_is_ucontrol(vcpu->kvm))
2134 return __kvm_ucontrol_vcpu_init(vcpu);
2135
2136 return 0;
2137 }
2138
2139 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2140 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2141 {
2142 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2143 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2144 vcpu->arch.cputm_start = get_tod_clock_fast();
2145 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2146 }
2147
2148 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2149 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2150 {
2151 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2152 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2153 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2154 vcpu->arch.cputm_start = 0;
2155 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2156 }
2157
2158 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2159 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2160 {
2161 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2162 vcpu->arch.cputm_enabled = true;
2163 __start_cpu_timer_accounting(vcpu);
2164 }
2165
2166 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2167 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2168 {
2169 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2170 __stop_cpu_timer_accounting(vcpu);
2171 vcpu->arch.cputm_enabled = false;
2172 }
2173
2174 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2175 {
2176 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2177 __enable_cpu_timer_accounting(vcpu);
2178 preempt_enable();
2179 }
2180
2181 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2182 {
2183 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2184 __disable_cpu_timer_accounting(vcpu);
2185 preempt_enable();
2186 }
2187
2188 /* set the cpu timer - may only be called from the VCPU thread itself */
2189 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2190 {
2191 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2192 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2193 if (vcpu->arch.cputm_enabled)
2194 vcpu->arch.cputm_start = get_tod_clock_fast();
2195 vcpu->arch.sie_block->cputm = cputm;
2196 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2197 preempt_enable();
2198 }
2199
2200 /* update and get the cpu timer - can also be called from other VCPU threads */
2201 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2202 {
2203 unsigned int seq;
2204 __u64 value;
2205
2206 if (unlikely(!vcpu->arch.cputm_enabled))
2207 return vcpu->arch.sie_block->cputm;
2208
2209 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2210 do {
2211 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2212 /*
2213 * If the writer would ever execute a read in the critical
2214 * section, e.g. in irq context, we have a deadlock.
2215 */
2216 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2217 value = vcpu->arch.sie_block->cputm;
2218 /* if cputm_start is 0, accounting is being started/stopped */
2219 if (likely(vcpu->arch.cputm_start))
2220 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2221 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2222 preempt_enable();
2223 return value;
2224 }
2225
2226 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2227 {
2228
2229 gmap_enable(vcpu->arch.enabled_gmap);
2230 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2231 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2232 __start_cpu_timer_accounting(vcpu);
2233 vcpu->cpu = cpu;
2234 }
2235
2236 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2237 {
2238 vcpu->cpu = -1;
2239 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2240 __stop_cpu_timer_accounting(vcpu);
2241 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2242 vcpu->arch.enabled_gmap = gmap_get_enabled();
2243 gmap_disable(vcpu->arch.enabled_gmap);
2244
2245 }
2246
2247 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2248 {
2249 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2250 vcpu->arch.sie_block->gpsw.mask = 0UL;
2251 vcpu->arch.sie_block->gpsw.addr = 0UL;
2252 kvm_s390_set_prefix(vcpu, 0);
2253 kvm_s390_set_cpu_timer(vcpu, 0);
2254 vcpu->arch.sie_block->ckc = 0UL;
2255 vcpu->arch.sie_block->todpr = 0;
2256 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2257 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2258 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2259 /* make sure the new fpc will be lazily loaded */
2260 save_fpu_regs();
2261 current->thread.fpu.fpc = 0;
2262 vcpu->arch.sie_block->gbea = 1;
2263 vcpu->arch.sie_block->pp = 0;
2264 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2265 kvm_clear_async_pf_completion_queue(vcpu);
2266 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2267 kvm_s390_vcpu_stop(vcpu);
2268 kvm_s390_clear_local_irqs(vcpu);
2269 }
2270
2271 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2272 {
2273 mutex_lock(&vcpu->kvm->lock);
2274 preempt_disable();
2275 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2276 preempt_enable();
2277 mutex_unlock(&vcpu->kvm->lock);
2278 if (!kvm_is_ucontrol(vcpu->kvm)) {
2279 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2280 sca_add_vcpu(vcpu);
2281 }
2282 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2283 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2284 /* make vcpu_load load the right gmap on the first trigger */
2285 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2286 }
2287
2288 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2289 {
2290 if (!test_kvm_facility(vcpu->kvm, 76))
2291 return;
2292
2293 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2294
2295 if (vcpu->kvm->arch.crypto.aes_kw)
2296 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2297 if (vcpu->kvm->arch.crypto.dea_kw)
2298 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2299
2300 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2301 }
2302
2303 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2304 {
2305 free_page(vcpu->arch.sie_block->cbrlo);
2306 vcpu->arch.sie_block->cbrlo = 0;
2307 }
2308
2309 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2310 {
2311 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2312 if (!vcpu->arch.sie_block->cbrlo)
2313 return -ENOMEM;
2314
2315 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2316 return 0;
2317 }
2318
2319 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2320 {
2321 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2322
2323 vcpu->arch.sie_block->ibc = model->ibc;
2324 if (test_kvm_facility(vcpu->kvm, 7))
2325 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2326 }
2327
2328 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2329 {
2330 int rc = 0;
2331
2332 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2333 CPUSTAT_SM |
2334 CPUSTAT_STOPPED);
2335
2336 if (test_kvm_facility(vcpu->kvm, 78))
2337 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2338 else if (test_kvm_facility(vcpu->kvm, 8))
2339 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2340
2341 kvm_s390_vcpu_setup_model(vcpu);
2342
2343 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2344 if (MACHINE_HAS_ESOP)
2345 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2346 if (test_kvm_facility(vcpu->kvm, 9))
2347 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2348 if (test_kvm_facility(vcpu->kvm, 73))
2349 vcpu->arch.sie_block->ecb |= ECB_TE;
2350
2351 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2352 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2353 if (test_kvm_facility(vcpu->kvm, 130))
2354 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2355 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2356 if (sclp.has_cei)
2357 vcpu->arch.sie_block->eca |= ECA_CEI;
2358 if (sclp.has_ib)
2359 vcpu->arch.sie_block->eca |= ECA_IB;
2360 if (sclp.has_siif)
2361 vcpu->arch.sie_block->eca |= ECA_SII;
2362 if (sclp.has_sigpif)
2363 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2364 if (test_kvm_facility(vcpu->kvm, 129)) {
2365 vcpu->arch.sie_block->eca |= ECA_VX;
2366 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2367 }
2368 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2369 | SDNXC;
2370 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2371
2372 if (sclp.has_kss)
2373 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2374 else
2375 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2376
2377 if (vcpu->kvm->arch.use_cmma) {
2378 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2379 if (rc)
2380 return rc;
2381 }
2382 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2383 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2384
2385 kvm_s390_vcpu_crypto_setup(vcpu);
2386
2387 return rc;
2388 }
2389
2390 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2391 unsigned int id)
2392 {
2393 struct kvm_vcpu *vcpu;
2394 struct sie_page *sie_page;
2395 int rc = -EINVAL;
2396
2397 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2398 goto out;
2399
2400 rc = -ENOMEM;
2401
2402 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2403 if (!vcpu)
2404 goto out;
2405
2406 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2407 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2408 if (!sie_page)
2409 goto out_free_cpu;
2410
2411 vcpu->arch.sie_block = &sie_page->sie_block;
2412 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2413
2414 /* the real guest size will always be smaller than msl */
2415 vcpu->arch.sie_block->mso = 0;
2416 vcpu->arch.sie_block->msl = sclp.hamax;
2417
2418 vcpu->arch.sie_block->icpua = id;
2419 spin_lock_init(&vcpu->arch.local_int.lock);
2420 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2421 vcpu->arch.local_int.wq = &vcpu->wq;
2422 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2423 seqcount_init(&vcpu->arch.cputm_seqcount);
2424
2425 rc = kvm_vcpu_init(vcpu, kvm, id);
2426 if (rc)
2427 goto out_free_sie_block;
2428 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2429 vcpu->arch.sie_block);
2430 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2431
2432 return vcpu;
2433 out_free_sie_block:
2434 free_page((unsigned long)(vcpu->arch.sie_block));
2435 out_free_cpu:
2436 kmem_cache_free(kvm_vcpu_cache, vcpu);
2437 out:
2438 return ERR_PTR(rc);
2439 }
2440
2441 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2442 {
2443 return kvm_s390_vcpu_has_irq(vcpu, 0);
2444 }
2445
2446 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2447 {
2448 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2449 exit_sie(vcpu);
2450 }
2451
2452 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2453 {
2454 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2455 }
2456
2457 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2458 {
2459 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2460 exit_sie(vcpu);
2461 }
2462
2463 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2464 {
2465 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2466 }
2467
2468 /*
2469 * Kick a guest cpu out of SIE and wait until SIE is not running.
2470 * If the CPU is not running (e.g. waiting as idle) the function will
2471 * return immediately. */
2472 void exit_sie(struct kvm_vcpu *vcpu)
2473 {
2474 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2475 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2476 cpu_relax();
2477 }
2478
2479 /* Kick a guest cpu out of SIE to process a request synchronously */
2480 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2481 {
2482 kvm_make_request(req, vcpu);
2483 kvm_s390_vcpu_request(vcpu);
2484 }
2485
2486 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2487 unsigned long end)
2488 {
2489 struct kvm *kvm = gmap->private;
2490 struct kvm_vcpu *vcpu;
2491 unsigned long prefix;
2492 int i;
2493
2494 if (gmap_is_shadow(gmap))
2495 return;
2496 if (start >= 1UL << 31)
2497 /* We are only interested in prefix pages */
2498 return;
2499 kvm_for_each_vcpu(i, vcpu, kvm) {
2500 /* match against both prefix pages */
2501 prefix = kvm_s390_get_prefix(vcpu);
2502 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2503 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2504 start, end);
2505 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2506 }
2507 }
2508 }
2509
2510 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2511 {
2512 /* kvm common code refers to this, but never calls it */
2513 BUG();
2514 return 0;
2515 }
2516
2517 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2518 struct kvm_one_reg *reg)
2519 {
2520 int r = -EINVAL;
2521
2522 switch (reg->id) {
2523 case KVM_REG_S390_TODPR:
2524 r = put_user(vcpu->arch.sie_block->todpr,
2525 (u32 __user *)reg->addr);
2526 break;
2527 case KVM_REG_S390_EPOCHDIFF:
2528 r = put_user(vcpu->arch.sie_block->epoch,
2529 (u64 __user *)reg->addr);
2530 break;
2531 case KVM_REG_S390_CPU_TIMER:
2532 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2533 (u64 __user *)reg->addr);
2534 break;
2535 case KVM_REG_S390_CLOCK_COMP:
2536 r = put_user(vcpu->arch.sie_block->ckc,
2537 (u64 __user *)reg->addr);
2538 break;
2539 case KVM_REG_S390_PFTOKEN:
2540 r = put_user(vcpu->arch.pfault_token,
2541 (u64 __user *)reg->addr);
2542 break;
2543 case KVM_REG_S390_PFCOMPARE:
2544 r = put_user(vcpu->arch.pfault_compare,
2545 (u64 __user *)reg->addr);
2546 break;
2547 case KVM_REG_S390_PFSELECT:
2548 r = put_user(vcpu->arch.pfault_select,
2549 (u64 __user *)reg->addr);
2550 break;
2551 case KVM_REG_S390_PP:
2552 r = put_user(vcpu->arch.sie_block->pp,
2553 (u64 __user *)reg->addr);
2554 break;
2555 case KVM_REG_S390_GBEA:
2556 r = put_user(vcpu->arch.sie_block->gbea,
2557 (u64 __user *)reg->addr);
2558 break;
2559 default:
2560 break;
2561 }
2562
2563 return r;
2564 }
2565
2566 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2567 struct kvm_one_reg *reg)
2568 {
2569 int r = -EINVAL;
2570 __u64 val;
2571
2572 switch (reg->id) {
2573 case KVM_REG_S390_TODPR:
2574 r = get_user(vcpu->arch.sie_block->todpr,
2575 (u32 __user *)reg->addr);
2576 break;
2577 case KVM_REG_S390_EPOCHDIFF:
2578 r = get_user(vcpu->arch.sie_block->epoch,
2579 (u64 __user *)reg->addr);
2580 break;
2581 case KVM_REG_S390_CPU_TIMER:
2582 r = get_user(val, (u64 __user *)reg->addr);
2583 if (!r)
2584 kvm_s390_set_cpu_timer(vcpu, val);
2585 break;
2586 case KVM_REG_S390_CLOCK_COMP:
2587 r = get_user(vcpu->arch.sie_block->ckc,
2588 (u64 __user *)reg->addr);
2589 break;
2590 case KVM_REG_S390_PFTOKEN:
2591 r = get_user(vcpu->arch.pfault_token,
2592 (u64 __user *)reg->addr);
2593 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2594 kvm_clear_async_pf_completion_queue(vcpu);
2595 break;
2596 case KVM_REG_S390_PFCOMPARE:
2597 r = get_user(vcpu->arch.pfault_compare,
2598 (u64 __user *)reg->addr);
2599 break;
2600 case KVM_REG_S390_PFSELECT:
2601 r = get_user(vcpu->arch.pfault_select,
2602 (u64 __user *)reg->addr);
2603 break;
2604 case KVM_REG_S390_PP:
2605 r = get_user(vcpu->arch.sie_block->pp,
2606 (u64 __user *)reg->addr);
2607 break;
2608 case KVM_REG_S390_GBEA:
2609 r = get_user(vcpu->arch.sie_block->gbea,
2610 (u64 __user *)reg->addr);
2611 break;
2612 default:
2613 break;
2614 }
2615
2616 return r;
2617 }
2618
2619 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2620 {
2621 kvm_s390_vcpu_initial_reset(vcpu);
2622 return 0;
2623 }
2624
2625 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2626 {
2627 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2628 return 0;
2629 }
2630
2631 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2632 {
2633 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2634 return 0;
2635 }
2636
2637 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2638 struct kvm_sregs *sregs)
2639 {
2640 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2641 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2642 return 0;
2643 }
2644
2645 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2646 struct kvm_sregs *sregs)
2647 {
2648 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2649 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2650 return 0;
2651 }
2652
2653 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2654 {
2655 if (test_fp_ctl(fpu->fpc))
2656 return -EINVAL;
2657 vcpu->run->s.regs.fpc = fpu->fpc;
2658 if (MACHINE_HAS_VX)
2659 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2660 (freg_t *) fpu->fprs);
2661 else
2662 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2663 return 0;
2664 }
2665
2666 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2667 {
2668 /* make sure we have the latest values */
2669 save_fpu_regs();
2670 if (MACHINE_HAS_VX)
2671 convert_vx_to_fp((freg_t *) fpu->fprs,
2672 (__vector128 *) vcpu->run->s.regs.vrs);
2673 else
2674 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2675 fpu->fpc = vcpu->run->s.regs.fpc;
2676 return 0;
2677 }
2678
2679 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2680 {
2681 int rc = 0;
2682
2683 if (!is_vcpu_stopped(vcpu))
2684 rc = -EBUSY;
2685 else {
2686 vcpu->run->psw_mask = psw.mask;
2687 vcpu->run->psw_addr = psw.addr;
2688 }
2689 return rc;
2690 }
2691
2692 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2693 struct kvm_translation *tr)
2694 {
2695 return -EINVAL; /* not implemented yet */
2696 }
2697
2698 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2699 KVM_GUESTDBG_USE_HW_BP | \
2700 KVM_GUESTDBG_ENABLE)
2701
2702 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2703 struct kvm_guest_debug *dbg)
2704 {
2705 int rc = 0;
2706
2707 vcpu->guest_debug = 0;
2708 kvm_s390_clear_bp_data(vcpu);
2709
2710 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2711 return -EINVAL;
2712 if (!sclp.has_gpere)
2713 return -EINVAL;
2714
2715 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2716 vcpu->guest_debug = dbg->control;
2717 /* enforce guest PER */
2718 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2719
2720 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2721 rc = kvm_s390_import_bp_data(vcpu, dbg);
2722 } else {
2723 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2724 vcpu->arch.guestdbg.last_bp = 0;
2725 }
2726
2727 if (rc) {
2728 vcpu->guest_debug = 0;
2729 kvm_s390_clear_bp_data(vcpu);
2730 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2731 }
2732
2733 return rc;
2734 }
2735
2736 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2737 struct kvm_mp_state *mp_state)
2738 {
2739 /* CHECK_STOP and LOAD are not supported yet */
2740 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2741 KVM_MP_STATE_OPERATING;
2742 }
2743
2744 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2745 struct kvm_mp_state *mp_state)
2746 {
2747 int rc = 0;
2748
2749 /* user space knows about this interface - let it control the state */
2750 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2751
2752 switch (mp_state->mp_state) {
2753 case KVM_MP_STATE_STOPPED:
2754 kvm_s390_vcpu_stop(vcpu);
2755 break;
2756 case KVM_MP_STATE_OPERATING:
2757 kvm_s390_vcpu_start(vcpu);
2758 break;
2759 case KVM_MP_STATE_LOAD:
2760 case KVM_MP_STATE_CHECK_STOP:
2761 /* fall through - CHECK_STOP and LOAD are not supported yet */
2762 default:
2763 rc = -ENXIO;
2764 }
2765
2766 return rc;
2767 }
2768
2769 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2770 {
2771 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2772 }
2773
2774 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2775 {
2776 retry:
2777 kvm_s390_vcpu_request_handled(vcpu);
2778 if (!kvm_request_pending(vcpu))
2779 return 0;
2780 /*
2781 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2782 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2783 * This ensures that the ipte instruction for this request has
2784 * already finished. We might race against a second unmapper that
2785 * wants to set the blocking bit. Lets just retry the request loop.
2786 */
2787 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2788 int rc;
2789 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2790 kvm_s390_get_prefix(vcpu),
2791 PAGE_SIZE * 2, PROT_WRITE);
2792 if (rc) {
2793 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2794 return rc;
2795 }
2796 goto retry;
2797 }
2798
2799 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2800 vcpu->arch.sie_block->ihcpu = 0xffff;
2801 goto retry;
2802 }
2803
2804 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2805 if (!ibs_enabled(vcpu)) {
2806 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2807 atomic_or(CPUSTAT_IBS,
2808 &vcpu->arch.sie_block->cpuflags);
2809 }
2810 goto retry;
2811 }
2812
2813 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2814 if (ibs_enabled(vcpu)) {
2815 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2816 atomic_andnot(CPUSTAT_IBS,
2817 &vcpu->arch.sie_block->cpuflags);
2818 }
2819 goto retry;
2820 }
2821
2822 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2823 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2824 goto retry;
2825 }
2826
2827 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2828 /*
2829 * Disable CMMA virtualization; we will emulate the ESSA
2830 * instruction manually, in order to provide additional
2831 * functionalities needed for live migration.
2832 */
2833 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2834 goto retry;
2835 }
2836
2837 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2838 /*
2839 * Re-enable CMMA virtualization if CMMA is available and
2840 * was used.
2841 */
2842 if ((vcpu->kvm->arch.use_cmma) &&
2843 (vcpu->kvm->mm->context.use_cmma))
2844 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2845 goto retry;
2846 }
2847
2848 /* nothing to do, just clear the request */
2849 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2850
2851 return 0;
2852 }
2853
2854 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2855 {
2856 struct kvm_vcpu *vcpu;
2857 int i;
2858
2859 mutex_lock(&kvm->lock);
2860 preempt_disable();
2861 kvm->arch.epoch = tod - get_tod_clock();
2862 kvm_s390_vcpu_block_all(kvm);
2863 kvm_for_each_vcpu(i, vcpu, kvm)
2864 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2865 kvm_s390_vcpu_unblock_all(kvm);
2866 preempt_enable();
2867 mutex_unlock(&kvm->lock);
2868 }
2869
2870 /**
2871 * kvm_arch_fault_in_page - fault-in guest page if necessary
2872 * @vcpu: The corresponding virtual cpu
2873 * @gpa: Guest physical address
2874 * @writable: Whether the page should be writable or not
2875 *
2876 * Make sure that a guest page has been faulted-in on the host.
2877 *
2878 * Return: Zero on success, negative error code otherwise.
2879 */
2880 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2881 {
2882 return gmap_fault(vcpu->arch.gmap, gpa,
2883 writable ? FAULT_FLAG_WRITE : 0);
2884 }
2885
2886 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2887 unsigned long token)
2888 {
2889 struct kvm_s390_interrupt inti;
2890 struct kvm_s390_irq irq;
2891
2892 if (start_token) {
2893 irq.u.ext.ext_params2 = token;
2894 irq.type = KVM_S390_INT_PFAULT_INIT;
2895 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2896 } else {
2897 inti.type = KVM_S390_INT_PFAULT_DONE;
2898 inti.parm64 = token;
2899 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2900 }
2901 }
2902
2903 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2904 struct kvm_async_pf *work)
2905 {
2906 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2907 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2908 }
2909
2910 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2911 struct kvm_async_pf *work)
2912 {
2913 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2914 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2915 }
2916
2917 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2918 struct kvm_async_pf *work)
2919 {
2920 /* s390 will always inject the page directly */
2921 }
2922
2923 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2924 {
2925 /*
2926 * s390 will always inject the page directly,
2927 * but we still want check_async_completion to cleanup
2928 */
2929 return true;
2930 }
2931
2932 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2933 {
2934 hva_t hva;
2935 struct kvm_arch_async_pf arch;
2936 int rc;
2937
2938 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2939 return 0;
2940 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2941 vcpu->arch.pfault_compare)
2942 return 0;
2943 if (psw_extint_disabled(vcpu))
2944 return 0;
2945 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2946 return 0;
2947 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2948 return 0;
2949 if (!vcpu->arch.gmap->pfault_enabled)
2950 return 0;
2951
2952 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2953 hva += current->thread.gmap_addr & ~PAGE_MASK;
2954 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2955 return 0;
2956
2957 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2958 return rc;
2959 }
2960
2961 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2962 {
2963 int rc, cpuflags;
2964
2965 /*
2966 * On s390 notifications for arriving pages will be delivered directly
2967 * to the guest but the house keeping for completed pfaults is
2968 * handled outside the worker.
2969 */
2970 kvm_check_async_pf_completion(vcpu);
2971
2972 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2973 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2974
2975 if (need_resched())
2976 schedule();
2977
2978 if (test_cpu_flag(CIF_MCCK_PENDING))
2979 s390_handle_mcck();
2980
2981 if (!kvm_is_ucontrol(vcpu->kvm)) {
2982 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2983 if (rc)
2984 return rc;
2985 }
2986
2987 rc = kvm_s390_handle_requests(vcpu);
2988 if (rc)
2989 return rc;
2990
2991 if (guestdbg_enabled(vcpu)) {
2992 kvm_s390_backup_guest_per_regs(vcpu);
2993 kvm_s390_patch_guest_per_regs(vcpu);
2994 }
2995
2996 vcpu->arch.sie_block->icptcode = 0;
2997 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2998 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2999 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3000
3001 return 0;
3002 }
3003
3004 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3005 {
3006 struct kvm_s390_pgm_info pgm_info = {
3007 .code = PGM_ADDRESSING,
3008 };
3009 u8 opcode, ilen;
3010 int rc;
3011
3012 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3013 trace_kvm_s390_sie_fault(vcpu);
3014
3015 /*
3016 * We want to inject an addressing exception, which is defined as a
3017 * suppressing or terminating exception. However, since we came here
3018 * by a DAT access exception, the PSW still points to the faulting
3019 * instruction since DAT exceptions are nullifying. So we've got
3020 * to look up the current opcode to get the length of the instruction
3021 * to be able to forward the PSW.
3022 */
3023 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3024 ilen = insn_length(opcode);
3025 if (rc < 0) {
3026 return rc;
3027 } else if (rc) {
3028 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3029 * Forward by arbitrary ilc, injection will take care of
3030 * nullification if necessary.
3031 */
3032 pgm_info = vcpu->arch.pgm;
3033 ilen = 4;
3034 }
3035 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3036 kvm_s390_forward_psw(vcpu, ilen);
3037 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3038 }
3039
3040 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3041 {
3042 struct mcck_volatile_info *mcck_info;
3043 struct sie_page *sie_page;
3044
3045 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3046 vcpu->arch.sie_block->icptcode);
3047 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3048
3049 if (guestdbg_enabled(vcpu))
3050 kvm_s390_restore_guest_per_regs(vcpu);
3051
3052 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3053 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3054
3055 if (exit_reason == -EINTR) {
3056 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3057 sie_page = container_of(vcpu->arch.sie_block,
3058 struct sie_page, sie_block);
3059 mcck_info = &sie_page->mcck_info;
3060 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3061 return 0;
3062 }
3063
3064 if (vcpu->arch.sie_block->icptcode > 0) {
3065 int rc = kvm_handle_sie_intercept(vcpu);
3066
3067 if (rc != -EOPNOTSUPP)
3068 return rc;
3069 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3070 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3071 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3072 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3073 return -EREMOTE;
3074 } else if (exit_reason != -EFAULT) {
3075 vcpu->stat.exit_null++;
3076 return 0;
3077 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3078 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3079 vcpu->run->s390_ucontrol.trans_exc_code =
3080 current->thread.gmap_addr;
3081 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3082 return -EREMOTE;
3083 } else if (current->thread.gmap_pfault) {
3084 trace_kvm_s390_major_guest_pfault(vcpu);
3085 current->thread.gmap_pfault = 0;
3086 if (kvm_arch_setup_async_pf(vcpu))
3087 return 0;
3088 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3089 }
3090 return vcpu_post_run_fault_in_sie(vcpu);
3091 }
3092
3093 static int __vcpu_run(struct kvm_vcpu *vcpu)
3094 {
3095 int rc, exit_reason;
3096
3097 /*
3098 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3099 * ning the guest), so that memslots (and other stuff) are protected
3100 */
3101 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3102
3103 do {
3104 rc = vcpu_pre_run(vcpu);
3105 if (rc)
3106 break;
3107
3108 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3109 /*
3110 * As PF_VCPU will be used in fault handler, between
3111 * guest_enter and guest_exit should be no uaccess.
3112 */
3113 local_irq_disable();
3114 guest_enter_irqoff();
3115 __disable_cpu_timer_accounting(vcpu);
3116 local_irq_enable();
3117 exit_reason = sie64a(vcpu->arch.sie_block,
3118 vcpu->run->s.regs.gprs);
3119 local_irq_disable();
3120 __enable_cpu_timer_accounting(vcpu);
3121 guest_exit_irqoff();
3122 local_irq_enable();
3123 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3124
3125 rc = vcpu_post_run(vcpu, exit_reason);
3126 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3127
3128 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3129 return rc;
3130 }
3131
3132 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3133 {
3134 struct runtime_instr_cb *riccb;
3135 struct gs_cb *gscb;
3136
3137 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3138 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3139 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3140 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3141 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3142 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3143 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3144 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3145 /* some control register changes require a tlb flush */
3146 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3147 }
3148 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3149 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3150 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3151 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3152 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3153 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3154 }
3155 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3156 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3157 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3158 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3159 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3160 kvm_clear_async_pf_completion_queue(vcpu);
3161 }
3162 /*
3163 * If userspace sets the riccb (e.g. after migration) to a valid state,
3164 * we should enable RI here instead of doing the lazy enablement.
3165 */
3166 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3167 test_kvm_facility(vcpu->kvm, 64) &&
3168 riccb->valid &&
3169 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3170 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3171 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3172 }
3173 /*
3174 * If userspace sets the gscb (e.g. after migration) to non-zero,
3175 * we should enable GS here instead of doing the lazy enablement.
3176 */
3177 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3178 test_kvm_facility(vcpu->kvm, 133) &&
3179 gscb->gssm &&
3180 !vcpu->arch.gs_enabled) {
3181 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3182 vcpu->arch.sie_block->ecb |= ECB_GS;
3183 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3184 vcpu->arch.gs_enabled = 1;
3185 }
3186 save_access_regs(vcpu->arch.host_acrs);
3187 restore_access_regs(vcpu->run->s.regs.acrs);
3188 /* save host (userspace) fprs/vrs */
3189 save_fpu_regs();
3190 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3191 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3192 if (MACHINE_HAS_VX)
3193 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3194 else
3195 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3196 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3197 if (test_fp_ctl(current->thread.fpu.fpc))
3198 /* User space provided an invalid FPC, let's clear it */
3199 current->thread.fpu.fpc = 0;
3200 if (MACHINE_HAS_GS) {
3201 preempt_disable();
3202 __ctl_set_bit(2, 4);
3203 if (current->thread.gs_cb) {
3204 vcpu->arch.host_gscb = current->thread.gs_cb;
3205 save_gs_cb(vcpu->arch.host_gscb);
3206 }
3207 if (vcpu->arch.gs_enabled) {
3208 current->thread.gs_cb = (struct gs_cb *)
3209 &vcpu->run->s.regs.gscb;
3210 restore_gs_cb(current->thread.gs_cb);
3211 }
3212 preempt_enable();
3213 }
3214
3215 kvm_run->kvm_dirty_regs = 0;
3216 }
3217
3218 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3219 {
3220 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3221 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3222 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3223 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3224 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3225 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3226 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3227 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3228 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3229 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3230 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3231 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3232 save_access_regs(vcpu->run->s.regs.acrs);
3233 restore_access_regs(vcpu->arch.host_acrs);
3234 /* Save guest register state */
3235 save_fpu_regs();
3236 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3237 /* Restore will be done lazily at return */
3238 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3239 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3240 if (MACHINE_HAS_GS) {
3241 __ctl_set_bit(2, 4);
3242 if (vcpu->arch.gs_enabled)
3243 save_gs_cb(current->thread.gs_cb);
3244 preempt_disable();
3245 current->thread.gs_cb = vcpu->arch.host_gscb;
3246 restore_gs_cb(vcpu->arch.host_gscb);
3247 preempt_enable();
3248 if (!vcpu->arch.host_gscb)
3249 __ctl_clear_bit(2, 4);
3250 vcpu->arch.host_gscb = NULL;
3251 }
3252
3253 }
3254
3255 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3256 {
3257 int rc;
3258 sigset_t sigsaved;
3259
3260 if (kvm_run->immediate_exit)
3261 return -EINTR;
3262
3263 if (guestdbg_exit_pending(vcpu)) {
3264 kvm_s390_prepare_debug_exit(vcpu);
3265 return 0;
3266 }
3267
3268 if (vcpu->sigset_active)
3269 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3270
3271 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3272 kvm_s390_vcpu_start(vcpu);
3273 } else if (is_vcpu_stopped(vcpu)) {
3274 pr_err_ratelimited("can't run stopped vcpu %d\n",
3275 vcpu->vcpu_id);
3276 return -EINVAL;
3277 }
3278
3279 sync_regs(vcpu, kvm_run);
3280 enable_cpu_timer_accounting(vcpu);
3281
3282 might_fault();
3283 rc = __vcpu_run(vcpu);
3284
3285 if (signal_pending(current) && !rc) {
3286 kvm_run->exit_reason = KVM_EXIT_INTR;
3287 rc = -EINTR;
3288 }
3289
3290 if (guestdbg_exit_pending(vcpu) && !rc) {
3291 kvm_s390_prepare_debug_exit(vcpu);
3292 rc = 0;
3293 }
3294
3295 if (rc == -EREMOTE) {
3296 /* userspace support is needed, kvm_run has been prepared */
3297 rc = 0;
3298 }
3299
3300 disable_cpu_timer_accounting(vcpu);
3301 store_regs(vcpu, kvm_run);
3302
3303 if (vcpu->sigset_active)
3304 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3305
3306 vcpu->stat.exit_userspace++;
3307 return rc;
3308 }
3309
3310 /*
3311 * store status at address
3312 * we use have two special cases:
3313 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3314 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3315 */
3316 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3317 {
3318 unsigned char archmode = 1;
3319 freg_t fprs[NUM_FPRS];
3320 unsigned int px;
3321 u64 clkcomp, cputm;
3322 int rc;
3323
3324 px = kvm_s390_get_prefix(vcpu);
3325 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3326 if (write_guest_abs(vcpu, 163, &archmode, 1))
3327 return -EFAULT;
3328 gpa = 0;
3329 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3330 if (write_guest_real(vcpu, 163, &archmode, 1))
3331 return -EFAULT;
3332 gpa = px;
3333 } else
3334 gpa -= __LC_FPREGS_SAVE_AREA;
3335
3336 /* manually convert vector registers if necessary */
3337 if (MACHINE_HAS_VX) {
3338 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3339 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3340 fprs, 128);
3341 } else {
3342 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3343 vcpu->run->s.regs.fprs, 128);
3344 }
3345 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3346 vcpu->run->s.regs.gprs, 128);
3347 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3348 &vcpu->arch.sie_block->gpsw, 16);
3349 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3350 &px, 4);
3351 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3352 &vcpu->run->s.regs.fpc, 4);
3353 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3354 &vcpu->arch.sie_block->todpr, 4);
3355 cputm = kvm_s390_get_cpu_timer(vcpu);
3356 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3357 &cputm, 8);
3358 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3359 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3360 &clkcomp, 8);
3361 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3362 &vcpu->run->s.regs.acrs, 64);
3363 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3364 &vcpu->arch.sie_block->gcr, 128);
3365 return rc ? -EFAULT : 0;
3366 }
3367
3368 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3369 {
3370 /*
3371 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3372 * switch in the run ioctl. Let's update our copies before we save
3373 * it into the save area
3374 */
3375 save_fpu_regs();
3376 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3377 save_access_regs(vcpu->run->s.regs.acrs);
3378
3379 return kvm_s390_store_status_unloaded(vcpu, addr);
3380 }
3381
3382 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3383 {
3384 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3385 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3386 }
3387
3388 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3389 {
3390 unsigned int i;
3391 struct kvm_vcpu *vcpu;
3392
3393 kvm_for_each_vcpu(i, vcpu, kvm) {
3394 __disable_ibs_on_vcpu(vcpu);
3395 }
3396 }
3397
3398 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3399 {
3400 if (!sclp.has_ibs)
3401 return;
3402 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3403 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3404 }
3405
3406 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3407 {
3408 int i, online_vcpus, started_vcpus = 0;
3409
3410 if (!is_vcpu_stopped(vcpu))
3411 return;
3412
3413 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3414 /* Only one cpu at a time may enter/leave the STOPPED state. */
3415 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3416 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3417
3418 for (i = 0; i < online_vcpus; i++) {
3419 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3420 started_vcpus++;
3421 }
3422
3423 if (started_vcpus == 0) {
3424 /* we're the only active VCPU -> speed it up */
3425 __enable_ibs_on_vcpu(vcpu);
3426 } else if (started_vcpus == 1) {
3427 /*
3428 * As we are starting a second VCPU, we have to disable
3429 * the IBS facility on all VCPUs to remove potentially
3430 * oustanding ENABLE requests.
3431 */
3432 __disable_ibs_on_all_vcpus(vcpu->kvm);
3433 }
3434
3435 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3436 /*
3437 * Another VCPU might have used IBS while we were offline.
3438 * Let's play safe and flush the VCPU at startup.
3439 */
3440 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3441 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3442 return;
3443 }
3444
3445 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3446 {
3447 int i, online_vcpus, started_vcpus = 0;
3448 struct kvm_vcpu *started_vcpu = NULL;
3449
3450 if (is_vcpu_stopped(vcpu))
3451 return;
3452
3453 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3454 /* Only one cpu at a time may enter/leave the STOPPED state. */
3455 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3456 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3457
3458 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3459 kvm_s390_clear_stop_irq(vcpu);
3460
3461 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3462 __disable_ibs_on_vcpu(vcpu);
3463
3464 for (i = 0; i < online_vcpus; i++) {
3465 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3466 started_vcpus++;
3467 started_vcpu = vcpu->kvm->vcpus[i];
3468 }
3469 }
3470
3471 if (started_vcpus == 1) {
3472 /*
3473 * As we only have one VCPU left, we want to enable the
3474 * IBS facility for that VCPU to speed it up.
3475 */
3476 __enable_ibs_on_vcpu(started_vcpu);
3477 }
3478
3479 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3480 return;
3481 }
3482
3483 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3484 struct kvm_enable_cap *cap)
3485 {
3486 int r;
3487
3488 if (cap->flags)
3489 return -EINVAL;
3490
3491 switch (cap->cap) {
3492 case KVM_CAP_S390_CSS_SUPPORT:
3493 if (!vcpu->kvm->arch.css_support) {
3494 vcpu->kvm->arch.css_support = 1;
3495 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3496 trace_kvm_s390_enable_css(vcpu->kvm);
3497 }
3498 r = 0;
3499 break;
3500 default:
3501 r = -EINVAL;
3502 break;
3503 }
3504 return r;
3505 }
3506
3507 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3508 struct kvm_s390_mem_op *mop)
3509 {
3510 void __user *uaddr = (void __user *)mop->buf;
3511 void *tmpbuf = NULL;
3512 int r, srcu_idx;
3513 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3514 | KVM_S390_MEMOP_F_CHECK_ONLY;
3515
3516 if (mop->flags & ~supported_flags)
3517 return -EINVAL;
3518
3519 if (mop->size > MEM_OP_MAX_SIZE)
3520 return -E2BIG;
3521
3522 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3523 tmpbuf = vmalloc(mop->size);
3524 if (!tmpbuf)
3525 return -ENOMEM;
3526 }
3527
3528 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3529
3530 switch (mop->op) {
3531 case KVM_S390_MEMOP_LOGICAL_READ:
3532 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3533 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3534 mop->size, GACC_FETCH);
3535 break;
3536 }
3537 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3538 if (r == 0) {
3539 if (copy_to_user(uaddr, tmpbuf, mop->size))
3540 r = -EFAULT;
3541 }
3542 break;
3543 case KVM_S390_MEMOP_LOGICAL_WRITE:
3544 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3545 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3546 mop->size, GACC_STORE);
3547 break;
3548 }
3549 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3550 r = -EFAULT;
3551 break;
3552 }
3553 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3554 break;
3555 default:
3556 r = -EINVAL;
3557 }
3558
3559 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3560
3561 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3562 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3563
3564 vfree(tmpbuf);
3565 return r;
3566 }
3567
3568 long kvm_arch_vcpu_ioctl(struct file *filp,
3569 unsigned int ioctl, unsigned long arg)
3570 {
3571 struct kvm_vcpu *vcpu = filp->private_data;
3572 void __user *argp = (void __user *)arg;
3573 int idx;
3574 long r;
3575
3576 switch (ioctl) {
3577 case KVM_S390_IRQ: {
3578 struct kvm_s390_irq s390irq;
3579
3580 r = -EFAULT;
3581 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3582 break;
3583 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3584 break;
3585 }
3586 case KVM_S390_INTERRUPT: {
3587 struct kvm_s390_interrupt s390int;
3588 struct kvm_s390_irq s390irq;
3589
3590 r = -EFAULT;
3591 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3592 break;
3593 if (s390int_to_s390irq(&s390int, &s390irq))
3594 return -EINVAL;
3595 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3596 break;
3597 }
3598 case KVM_S390_STORE_STATUS:
3599 idx = srcu_read_lock(&vcpu->kvm->srcu);
3600 r = kvm_s390_vcpu_store_status(vcpu, arg);
3601 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3602 break;
3603 case KVM_S390_SET_INITIAL_PSW: {
3604 psw_t psw;
3605
3606 r = -EFAULT;
3607 if (copy_from_user(&psw, argp, sizeof(psw)))
3608 break;
3609 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3610 break;
3611 }
3612 case KVM_S390_INITIAL_RESET:
3613 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3614 break;
3615 case KVM_SET_ONE_REG:
3616 case KVM_GET_ONE_REG: {
3617 struct kvm_one_reg reg;
3618 r = -EFAULT;
3619 if (copy_from_user(&reg, argp, sizeof(reg)))
3620 break;
3621 if (ioctl == KVM_SET_ONE_REG)
3622 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3623 else
3624 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3625 break;
3626 }
3627 #ifdef CONFIG_KVM_S390_UCONTROL
3628 case KVM_S390_UCAS_MAP: {
3629 struct kvm_s390_ucas_mapping ucasmap;
3630
3631 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3632 r = -EFAULT;
3633 break;
3634 }
3635
3636 if (!kvm_is_ucontrol(vcpu->kvm)) {
3637 r = -EINVAL;
3638 break;
3639 }
3640
3641 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3642 ucasmap.vcpu_addr, ucasmap.length);
3643 break;
3644 }
3645 case KVM_S390_UCAS_UNMAP: {
3646 struct kvm_s390_ucas_mapping ucasmap;
3647
3648 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3649 r = -EFAULT;
3650 break;
3651 }
3652
3653 if (!kvm_is_ucontrol(vcpu->kvm)) {
3654 r = -EINVAL;
3655 break;
3656 }
3657
3658 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3659 ucasmap.length);
3660 break;
3661 }
3662 #endif
3663 case KVM_S390_VCPU_FAULT: {
3664 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3665 break;
3666 }
3667 case KVM_ENABLE_CAP:
3668 {
3669 struct kvm_enable_cap cap;
3670 r = -EFAULT;
3671 if (copy_from_user(&cap, argp, sizeof(cap)))
3672 break;
3673 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3674 break;
3675 }
3676 case KVM_S390_MEM_OP: {
3677 struct kvm_s390_mem_op mem_op;
3678
3679 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3680 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3681 else
3682 r = -EFAULT;
3683 break;
3684 }
3685 case KVM_S390_SET_IRQ_STATE: {
3686 struct kvm_s390_irq_state irq_state;
3687
3688 r = -EFAULT;
3689 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3690 break;
3691 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3692 irq_state.len == 0 ||
3693 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3694 r = -EINVAL;
3695 break;
3696 }
3697 r = kvm_s390_set_irq_state(vcpu,
3698 (void __user *) irq_state.buf,
3699 irq_state.len);
3700 break;
3701 }
3702 case KVM_S390_GET_IRQ_STATE: {
3703 struct kvm_s390_irq_state irq_state;
3704
3705 r = -EFAULT;
3706 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3707 break;
3708 if (irq_state.len == 0) {
3709 r = -EINVAL;
3710 break;
3711 }
3712 r = kvm_s390_get_irq_state(vcpu,
3713 (__u8 __user *) irq_state.buf,
3714 irq_state.len);
3715 break;
3716 }
3717 default:
3718 r = -ENOTTY;
3719 }
3720 return r;
3721 }
3722
3723 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3724 {
3725 #ifdef CONFIG_KVM_S390_UCONTROL
3726 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3727 && (kvm_is_ucontrol(vcpu->kvm))) {
3728 vmf->page = virt_to_page(vcpu->arch.sie_block);
3729 get_page(vmf->page);
3730 return 0;
3731 }
3732 #endif
3733 return VM_FAULT_SIGBUS;
3734 }
3735
3736 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3737 unsigned long npages)
3738 {
3739 return 0;
3740 }
3741
3742 /* Section: memory related */
3743 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3744 struct kvm_memory_slot *memslot,
3745 const struct kvm_userspace_memory_region *mem,
3746 enum kvm_mr_change change)
3747 {
3748 /* A few sanity checks. We can have memory slots which have to be
3749 located/ended at a segment boundary (1MB). The memory in userland is
3750 ok to be fragmented into various different vmas. It is okay to mmap()
3751 and munmap() stuff in this slot after doing this call at any time */
3752
3753 if (mem->userspace_addr & 0xffffful)
3754 return -EINVAL;
3755
3756 if (mem->memory_size & 0xffffful)
3757 return -EINVAL;
3758
3759 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3760 return -EINVAL;
3761
3762 return 0;
3763 }
3764
3765 void kvm_arch_commit_memory_region(struct kvm *kvm,
3766 const struct kvm_userspace_memory_region *mem,
3767 const struct kvm_memory_slot *old,
3768 const struct kvm_memory_slot *new,
3769 enum kvm_mr_change change)
3770 {
3771 int rc;
3772
3773 /* If the basics of the memslot do not change, we do not want
3774 * to update the gmap. Every update causes several unnecessary
3775 * segment translation exceptions. This is usually handled just
3776 * fine by the normal fault handler + gmap, but it will also
3777 * cause faults on the prefix page of running guest CPUs.
3778 */
3779 if (old->userspace_addr == mem->userspace_addr &&
3780 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3781 old->npages * PAGE_SIZE == mem->memory_size)
3782 return;
3783
3784 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3785 mem->guest_phys_addr, mem->memory_size);
3786 if (rc)
3787 pr_warn("failed to commit memory region\n");
3788 return;
3789 }
3790
3791 static inline unsigned long nonhyp_mask(int i)
3792 {
3793 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3794
3795 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3796 }
3797
3798 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3799 {
3800 vcpu->valid_wakeup = false;
3801 }
3802
3803 static int __init kvm_s390_init(void)
3804 {
3805 int i;
3806
3807 if (!sclp.has_sief2) {
3808 pr_info("SIE not available\n");
3809 return -ENODEV;
3810 }
3811
3812 for (i = 0; i < 16; i++)
3813 kvm_s390_fac_list_mask[i] |=
3814 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3815
3816 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3817 }
3818
3819 static void __exit kvm_s390_exit(void)
3820 {
3821 kvm_exit();
3822 }
3823
3824 module_init(kvm_s390_init);
3825 module_exit(kvm_s390_exit);
3826
3827 /*
3828 * Enable autoloading of the kvm module.
3829 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3830 * since x86 takes a different approach.
3831 */
3832 #include <linux/miscdevice.h>
3833 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3834 MODULE_ALIAS("devname:kvm");