]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: do not block CPU on dirty logging
[mirror_ubuntu-jammy-kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
116 { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137 /* every s390 is virtualization enabled ;-) */
138 return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
148 */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150 void *v)
151 {
152 struct kvm *kvm;
153 struct kvm_vcpu *vcpu;
154 int i;
155 unsigned long long *delta = v;
156
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
161 }
162 }
163 return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
176 return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189 if (!kvm_s390_dbf)
190 return -ENOMEM;
191
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
194 return -ENOMEM;
195 }
196
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203 debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
209 {
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
212 return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217 int r;
218
219 switch (ext) {
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
225 #endif
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
242 r = 1;
243 break;
244 case KVM_CAP_S390_MEM_OP:
245 r = MEM_OP_MAX_SIZE;
246 break;
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
249 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250 : KVM_S390_BSCA_CPU_SLOTS;
251 break;
252 case KVM_CAP_NR_MEMSLOTS:
253 r = KVM_USER_MEM_SLOTS;
254 break;
255 case KVM_CAP_S390_COW:
256 r = MACHINE_HAS_ESOP;
257 break;
258 case KVM_CAP_S390_VECTOR_REGISTERS:
259 r = MACHINE_HAS_VX;
260 break;
261 case KVM_CAP_S390_RI:
262 r = test_facility(64);
263 break;
264 default:
265 r = 0;
266 }
267 return r;
268 }
269
270 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
271 struct kvm_memory_slot *memslot)
272 {
273 gfn_t cur_gfn, last_gfn;
274 unsigned long address;
275 struct gmap *gmap = kvm->arch.gmap;
276
277 /* Loop over all guest pages */
278 last_gfn = memslot->base_gfn + memslot->npages;
279 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
280 address = gfn_to_hva_memslot(memslot, cur_gfn);
281
282 if (gmap_test_and_clear_dirty(address, gmap))
283 mark_page_dirty(kvm, cur_gfn);
284 cond_resched();
285 }
286 }
287
288 /* Section: vm related */
289 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
290
291 /*
292 * Get (and clear) the dirty memory log for a memory slot.
293 */
294 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
295 struct kvm_dirty_log *log)
296 {
297 int r;
298 unsigned long n;
299 struct kvm_memslots *slots;
300 struct kvm_memory_slot *memslot;
301 int is_dirty = 0;
302
303 mutex_lock(&kvm->slots_lock);
304
305 r = -EINVAL;
306 if (log->slot >= KVM_USER_MEM_SLOTS)
307 goto out;
308
309 slots = kvm_memslots(kvm);
310 memslot = id_to_memslot(slots, log->slot);
311 r = -ENOENT;
312 if (!memslot->dirty_bitmap)
313 goto out;
314
315 kvm_s390_sync_dirty_log(kvm, memslot);
316 r = kvm_get_dirty_log(kvm, log, &is_dirty);
317 if (r)
318 goto out;
319
320 /* Clear the dirty log */
321 if (is_dirty) {
322 n = kvm_dirty_bitmap_bytes(memslot);
323 memset(memslot->dirty_bitmap, 0, n);
324 }
325 r = 0;
326 out:
327 mutex_unlock(&kvm->slots_lock);
328 return r;
329 }
330
331 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
332 {
333 int r;
334
335 if (cap->flags)
336 return -EINVAL;
337
338 switch (cap->cap) {
339 case KVM_CAP_S390_IRQCHIP:
340 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
341 kvm->arch.use_irqchip = 1;
342 r = 0;
343 break;
344 case KVM_CAP_S390_USER_SIGP:
345 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
346 kvm->arch.user_sigp = 1;
347 r = 0;
348 break;
349 case KVM_CAP_S390_VECTOR_REGISTERS:
350 mutex_lock(&kvm->lock);
351 if (atomic_read(&kvm->online_vcpus)) {
352 r = -EBUSY;
353 } else if (MACHINE_HAS_VX) {
354 set_kvm_facility(kvm->arch.model.fac->mask, 129);
355 set_kvm_facility(kvm->arch.model.fac->list, 129);
356 r = 0;
357 } else
358 r = -EINVAL;
359 mutex_unlock(&kvm->lock);
360 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
361 r ? "(not available)" : "(success)");
362 break;
363 case KVM_CAP_S390_RI:
364 r = -EINVAL;
365 mutex_lock(&kvm->lock);
366 if (atomic_read(&kvm->online_vcpus)) {
367 r = -EBUSY;
368 } else if (test_facility(64)) {
369 set_kvm_facility(kvm->arch.model.fac->mask, 64);
370 set_kvm_facility(kvm->arch.model.fac->list, 64);
371 r = 0;
372 }
373 mutex_unlock(&kvm->lock);
374 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
375 r ? "(not available)" : "(success)");
376 break;
377 case KVM_CAP_S390_USER_STSI:
378 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
379 kvm->arch.user_stsi = 1;
380 r = 0;
381 break;
382 default:
383 r = -EINVAL;
384 break;
385 }
386 return r;
387 }
388
389 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
390 {
391 int ret;
392
393 switch (attr->attr) {
394 case KVM_S390_VM_MEM_LIMIT_SIZE:
395 ret = 0;
396 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
397 kvm->arch.mem_limit);
398 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
399 ret = -EFAULT;
400 break;
401 default:
402 ret = -ENXIO;
403 break;
404 }
405 return ret;
406 }
407
408 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
409 {
410 int ret;
411 unsigned int idx;
412 switch (attr->attr) {
413 case KVM_S390_VM_MEM_ENABLE_CMMA:
414 /* enable CMMA only for z10 and later (EDAT_1) */
415 ret = -EINVAL;
416 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
417 break;
418
419 ret = -EBUSY;
420 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
421 mutex_lock(&kvm->lock);
422 if (atomic_read(&kvm->online_vcpus) == 0) {
423 kvm->arch.use_cmma = 1;
424 ret = 0;
425 }
426 mutex_unlock(&kvm->lock);
427 break;
428 case KVM_S390_VM_MEM_CLR_CMMA:
429 ret = -EINVAL;
430 if (!kvm->arch.use_cmma)
431 break;
432
433 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
434 mutex_lock(&kvm->lock);
435 idx = srcu_read_lock(&kvm->srcu);
436 s390_reset_cmma(kvm->arch.gmap->mm);
437 srcu_read_unlock(&kvm->srcu, idx);
438 mutex_unlock(&kvm->lock);
439 ret = 0;
440 break;
441 case KVM_S390_VM_MEM_LIMIT_SIZE: {
442 unsigned long new_limit;
443
444 if (kvm_is_ucontrol(kvm))
445 return -EINVAL;
446
447 if (get_user(new_limit, (u64 __user *)attr->addr))
448 return -EFAULT;
449
450 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
451 new_limit > kvm->arch.mem_limit)
452 return -E2BIG;
453
454 if (!new_limit)
455 return -EINVAL;
456
457 /* gmap_alloc takes last usable address */
458 if (new_limit != KVM_S390_NO_MEM_LIMIT)
459 new_limit -= 1;
460
461 ret = -EBUSY;
462 mutex_lock(&kvm->lock);
463 if (atomic_read(&kvm->online_vcpus) == 0) {
464 /* gmap_alloc will round the limit up */
465 struct gmap *new = gmap_alloc(current->mm, new_limit);
466
467 if (!new) {
468 ret = -ENOMEM;
469 } else {
470 gmap_free(kvm->arch.gmap);
471 new->private = kvm;
472 kvm->arch.gmap = new;
473 ret = 0;
474 }
475 }
476 mutex_unlock(&kvm->lock);
477 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
478 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
479 (void *) kvm->arch.gmap->asce);
480 break;
481 }
482 default:
483 ret = -ENXIO;
484 break;
485 }
486 return ret;
487 }
488
489 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
490
491 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
492 {
493 struct kvm_vcpu *vcpu;
494 int i;
495
496 if (!test_kvm_facility(kvm, 76))
497 return -EINVAL;
498
499 mutex_lock(&kvm->lock);
500 switch (attr->attr) {
501 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
502 get_random_bytes(
503 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
504 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
505 kvm->arch.crypto.aes_kw = 1;
506 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
507 break;
508 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
509 get_random_bytes(
510 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
511 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
512 kvm->arch.crypto.dea_kw = 1;
513 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
514 break;
515 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
516 kvm->arch.crypto.aes_kw = 0;
517 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
518 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
519 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
520 break;
521 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
522 kvm->arch.crypto.dea_kw = 0;
523 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
524 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
525 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
526 break;
527 default:
528 mutex_unlock(&kvm->lock);
529 return -ENXIO;
530 }
531
532 kvm_for_each_vcpu(i, vcpu, kvm) {
533 kvm_s390_vcpu_crypto_setup(vcpu);
534 exit_sie(vcpu);
535 }
536 mutex_unlock(&kvm->lock);
537 return 0;
538 }
539
540 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
541 {
542 u8 gtod_high;
543
544 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
545 sizeof(gtod_high)))
546 return -EFAULT;
547
548 if (gtod_high != 0)
549 return -EINVAL;
550 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
551
552 return 0;
553 }
554
555 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
556 {
557 u64 gtod;
558
559 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
560 return -EFAULT;
561
562 kvm_s390_set_tod_clock(kvm, gtod);
563 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
564 return 0;
565 }
566
567 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
568 {
569 int ret;
570
571 if (attr->flags)
572 return -EINVAL;
573
574 switch (attr->attr) {
575 case KVM_S390_VM_TOD_HIGH:
576 ret = kvm_s390_set_tod_high(kvm, attr);
577 break;
578 case KVM_S390_VM_TOD_LOW:
579 ret = kvm_s390_set_tod_low(kvm, attr);
580 break;
581 default:
582 ret = -ENXIO;
583 break;
584 }
585 return ret;
586 }
587
588 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
589 {
590 u8 gtod_high = 0;
591
592 if (copy_to_user((void __user *)attr->addr, &gtod_high,
593 sizeof(gtod_high)))
594 return -EFAULT;
595 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
596
597 return 0;
598 }
599
600 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
601 {
602 u64 gtod;
603
604 gtod = kvm_s390_get_tod_clock_fast(kvm);
605 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
606 return -EFAULT;
607 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
608
609 return 0;
610 }
611
612 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
613 {
614 int ret;
615
616 if (attr->flags)
617 return -EINVAL;
618
619 switch (attr->attr) {
620 case KVM_S390_VM_TOD_HIGH:
621 ret = kvm_s390_get_tod_high(kvm, attr);
622 break;
623 case KVM_S390_VM_TOD_LOW:
624 ret = kvm_s390_get_tod_low(kvm, attr);
625 break;
626 default:
627 ret = -ENXIO;
628 break;
629 }
630 return ret;
631 }
632
633 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
634 {
635 struct kvm_s390_vm_cpu_processor *proc;
636 int ret = 0;
637
638 mutex_lock(&kvm->lock);
639 if (atomic_read(&kvm->online_vcpus)) {
640 ret = -EBUSY;
641 goto out;
642 }
643 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
644 if (!proc) {
645 ret = -ENOMEM;
646 goto out;
647 }
648 if (!copy_from_user(proc, (void __user *)attr->addr,
649 sizeof(*proc))) {
650 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
651 sizeof(struct cpuid));
652 kvm->arch.model.ibc = proc->ibc;
653 memcpy(kvm->arch.model.fac->list, proc->fac_list,
654 S390_ARCH_FAC_LIST_SIZE_BYTE);
655 } else
656 ret = -EFAULT;
657 kfree(proc);
658 out:
659 mutex_unlock(&kvm->lock);
660 return ret;
661 }
662
663 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
664 {
665 int ret = -ENXIO;
666
667 switch (attr->attr) {
668 case KVM_S390_VM_CPU_PROCESSOR:
669 ret = kvm_s390_set_processor(kvm, attr);
670 break;
671 }
672 return ret;
673 }
674
675 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
676 {
677 struct kvm_s390_vm_cpu_processor *proc;
678 int ret = 0;
679
680 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
681 if (!proc) {
682 ret = -ENOMEM;
683 goto out;
684 }
685 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
686 proc->ibc = kvm->arch.model.ibc;
687 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
688 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
689 ret = -EFAULT;
690 kfree(proc);
691 out:
692 return ret;
693 }
694
695 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
696 {
697 struct kvm_s390_vm_cpu_machine *mach;
698 int ret = 0;
699
700 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
701 if (!mach) {
702 ret = -ENOMEM;
703 goto out;
704 }
705 get_cpu_id((struct cpuid *) &mach->cpuid);
706 mach->ibc = sclp.ibc;
707 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
708 S390_ARCH_FAC_LIST_SIZE_BYTE);
709 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
710 S390_ARCH_FAC_LIST_SIZE_BYTE);
711 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
712 ret = -EFAULT;
713 kfree(mach);
714 out:
715 return ret;
716 }
717
718 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
719 {
720 int ret = -ENXIO;
721
722 switch (attr->attr) {
723 case KVM_S390_VM_CPU_PROCESSOR:
724 ret = kvm_s390_get_processor(kvm, attr);
725 break;
726 case KVM_S390_VM_CPU_MACHINE:
727 ret = kvm_s390_get_machine(kvm, attr);
728 break;
729 }
730 return ret;
731 }
732
733 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
734 {
735 int ret;
736
737 switch (attr->group) {
738 case KVM_S390_VM_MEM_CTRL:
739 ret = kvm_s390_set_mem_control(kvm, attr);
740 break;
741 case KVM_S390_VM_TOD:
742 ret = kvm_s390_set_tod(kvm, attr);
743 break;
744 case KVM_S390_VM_CPU_MODEL:
745 ret = kvm_s390_set_cpu_model(kvm, attr);
746 break;
747 case KVM_S390_VM_CRYPTO:
748 ret = kvm_s390_vm_set_crypto(kvm, attr);
749 break;
750 default:
751 ret = -ENXIO;
752 break;
753 }
754
755 return ret;
756 }
757
758 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
759 {
760 int ret;
761
762 switch (attr->group) {
763 case KVM_S390_VM_MEM_CTRL:
764 ret = kvm_s390_get_mem_control(kvm, attr);
765 break;
766 case KVM_S390_VM_TOD:
767 ret = kvm_s390_get_tod(kvm, attr);
768 break;
769 case KVM_S390_VM_CPU_MODEL:
770 ret = kvm_s390_get_cpu_model(kvm, attr);
771 break;
772 default:
773 ret = -ENXIO;
774 break;
775 }
776
777 return ret;
778 }
779
780 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
781 {
782 int ret;
783
784 switch (attr->group) {
785 case KVM_S390_VM_MEM_CTRL:
786 switch (attr->attr) {
787 case KVM_S390_VM_MEM_ENABLE_CMMA:
788 case KVM_S390_VM_MEM_CLR_CMMA:
789 case KVM_S390_VM_MEM_LIMIT_SIZE:
790 ret = 0;
791 break;
792 default:
793 ret = -ENXIO;
794 break;
795 }
796 break;
797 case KVM_S390_VM_TOD:
798 switch (attr->attr) {
799 case KVM_S390_VM_TOD_LOW:
800 case KVM_S390_VM_TOD_HIGH:
801 ret = 0;
802 break;
803 default:
804 ret = -ENXIO;
805 break;
806 }
807 break;
808 case KVM_S390_VM_CPU_MODEL:
809 switch (attr->attr) {
810 case KVM_S390_VM_CPU_PROCESSOR:
811 case KVM_S390_VM_CPU_MACHINE:
812 ret = 0;
813 break;
814 default:
815 ret = -ENXIO;
816 break;
817 }
818 break;
819 case KVM_S390_VM_CRYPTO:
820 switch (attr->attr) {
821 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
822 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
823 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
824 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
825 ret = 0;
826 break;
827 default:
828 ret = -ENXIO;
829 break;
830 }
831 break;
832 default:
833 ret = -ENXIO;
834 break;
835 }
836
837 return ret;
838 }
839
840 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
841 {
842 uint8_t *keys;
843 uint64_t hva;
844 unsigned long curkey;
845 int i, r = 0;
846
847 if (args->flags != 0)
848 return -EINVAL;
849
850 /* Is this guest using storage keys? */
851 if (!mm_use_skey(current->mm))
852 return KVM_S390_GET_SKEYS_NONE;
853
854 /* Enforce sane limit on memory allocation */
855 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
856 return -EINVAL;
857
858 keys = kmalloc_array(args->count, sizeof(uint8_t),
859 GFP_KERNEL | __GFP_NOWARN);
860 if (!keys)
861 keys = vmalloc(sizeof(uint8_t) * args->count);
862 if (!keys)
863 return -ENOMEM;
864
865 for (i = 0; i < args->count; i++) {
866 hva = gfn_to_hva(kvm, args->start_gfn + i);
867 if (kvm_is_error_hva(hva)) {
868 r = -EFAULT;
869 goto out;
870 }
871
872 curkey = get_guest_storage_key(current->mm, hva);
873 if (IS_ERR_VALUE(curkey)) {
874 r = curkey;
875 goto out;
876 }
877 keys[i] = curkey;
878 }
879
880 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
881 sizeof(uint8_t) * args->count);
882 if (r)
883 r = -EFAULT;
884 out:
885 kvfree(keys);
886 return r;
887 }
888
889 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
890 {
891 uint8_t *keys;
892 uint64_t hva;
893 int i, r = 0;
894
895 if (args->flags != 0)
896 return -EINVAL;
897
898 /* Enforce sane limit on memory allocation */
899 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
900 return -EINVAL;
901
902 keys = kmalloc_array(args->count, sizeof(uint8_t),
903 GFP_KERNEL | __GFP_NOWARN);
904 if (!keys)
905 keys = vmalloc(sizeof(uint8_t) * args->count);
906 if (!keys)
907 return -ENOMEM;
908
909 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
910 sizeof(uint8_t) * args->count);
911 if (r) {
912 r = -EFAULT;
913 goto out;
914 }
915
916 /* Enable storage key handling for the guest */
917 r = s390_enable_skey();
918 if (r)
919 goto out;
920
921 for (i = 0; i < args->count; i++) {
922 hva = gfn_to_hva(kvm, args->start_gfn + i);
923 if (kvm_is_error_hva(hva)) {
924 r = -EFAULT;
925 goto out;
926 }
927
928 /* Lowest order bit is reserved */
929 if (keys[i] & 0x01) {
930 r = -EINVAL;
931 goto out;
932 }
933
934 r = set_guest_storage_key(current->mm, hva,
935 (unsigned long)keys[i], 0);
936 if (r)
937 goto out;
938 }
939 out:
940 kvfree(keys);
941 return r;
942 }
943
944 long kvm_arch_vm_ioctl(struct file *filp,
945 unsigned int ioctl, unsigned long arg)
946 {
947 struct kvm *kvm = filp->private_data;
948 void __user *argp = (void __user *)arg;
949 struct kvm_device_attr attr;
950 int r;
951
952 switch (ioctl) {
953 case KVM_S390_INTERRUPT: {
954 struct kvm_s390_interrupt s390int;
955
956 r = -EFAULT;
957 if (copy_from_user(&s390int, argp, sizeof(s390int)))
958 break;
959 r = kvm_s390_inject_vm(kvm, &s390int);
960 break;
961 }
962 case KVM_ENABLE_CAP: {
963 struct kvm_enable_cap cap;
964 r = -EFAULT;
965 if (copy_from_user(&cap, argp, sizeof(cap)))
966 break;
967 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
968 break;
969 }
970 case KVM_CREATE_IRQCHIP: {
971 struct kvm_irq_routing_entry routing;
972
973 r = -EINVAL;
974 if (kvm->arch.use_irqchip) {
975 /* Set up dummy routing. */
976 memset(&routing, 0, sizeof(routing));
977 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
978 }
979 break;
980 }
981 case KVM_SET_DEVICE_ATTR: {
982 r = -EFAULT;
983 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
984 break;
985 r = kvm_s390_vm_set_attr(kvm, &attr);
986 break;
987 }
988 case KVM_GET_DEVICE_ATTR: {
989 r = -EFAULT;
990 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
991 break;
992 r = kvm_s390_vm_get_attr(kvm, &attr);
993 break;
994 }
995 case KVM_HAS_DEVICE_ATTR: {
996 r = -EFAULT;
997 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
998 break;
999 r = kvm_s390_vm_has_attr(kvm, &attr);
1000 break;
1001 }
1002 case KVM_S390_GET_SKEYS: {
1003 struct kvm_s390_skeys args;
1004
1005 r = -EFAULT;
1006 if (copy_from_user(&args, argp,
1007 sizeof(struct kvm_s390_skeys)))
1008 break;
1009 r = kvm_s390_get_skeys(kvm, &args);
1010 break;
1011 }
1012 case KVM_S390_SET_SKEYS: {
1013 struct kvm_s390_skeys args;
1014
1015 r = -EFAULT;
1016 if (copy_from_user(&args, argp,
1017 sizeof(struct kvm_s390_skeys)))
1018 break;
1019 r = kvm_s390_set_skeys(kvm, &args);
1020 break;
1021 }
1022 default:
1023 r = -ENOTTY;
1024 }
1025
1026 return r;
1027 }
1028
1029 static int kvm_s390_query_ap_config(u8 *config)
1030 {
1031 u32 fcn_code = 0x04000000UL;
1032 u32 cc = 0;
1033
1034 memset(config, 0, 128);
1035 asm volatile(
1036 "lgr 0,%1\n"
1037 "lgr 2,%2\n"
1038 ".long 0xb2af0000\n" /* PQAP(QCI) */
1039 "0: ipm %0\n"
1040 "srl %0,28\n"
1041 "1:\n"
1042 EX_TABLE(0b, 1b)
1043 : "+r" (cc)
1044 : "r" (fcn_code), "r" (config)
1045 : "cc", "0", "2", "memory"
1046 );
1047
1048 return cc;
1049 }
1050
1051 static int kvm_s390_apxa_installed(void)
1052 {
1053 u8 config[128];
1054 int cc;
1055
1056 if (test_facility(12)) {
1057 cc = kvm_s390_query_ap_config(config);
1058
1059 if (cc)
1060 pr_err("PQAP(QCI) failed with cc=%d", cc);
1061 else
1062 return config[0] & 0x40;
1063 }
1064
1065 return 0;
1066 }
1067
1068 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1069 {
1070 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1071
1072 if (kvm_s390_apxa_installed())
1073 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1074 else
1075 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1076 }
1077
1078 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1079 {
1080 get_cpu_id(cpu_id);
1081 cpu_id->version = 0xff;
1082 }
1083
1084 static int kvm_s390_crypto_init(struct kvm *kvm)
1085 {
1086 if (!test_kvm_facility(kvm, 76))
1087 return 0;
1088
1089 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1090 GFP_KERNEL | GFP_DMA);
1091 if (!kvm->arch.crypto.crycb)
1092 return -ENOMEM;
1093
1094 kvm_s390_set_crycb_format(kvm);
1095
1096 /* Enable AES/DEA protected key functions by default */
1097 kvm->arch.crypto.aes_kw = 1;
1098 kvm->arch.crypto.dea_kw = 1;
1099 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1100 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1101 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1102 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1103
1104 return 0;
1105 }
1106
1107 static void sca_dispose(struct kvm *kvm)
1108 {
1109 if (kvm->arch.use_esca)
1110 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1111 else
1112 free_page((unsigned long)(kvm->arch.sca));
1113 kvm->arch.sca = NULL;
1114 }
1115
1116 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1117 {
1118 int i, rc;
1119 char debug_name[16];
1120 static unsigned long sca_offset;
1121
1122 rc = -EINVAL;
1123 #ifdef CONFIG_KVM_S390_UCONTROL
1124 if (type & ~KVM_VM_S390_UCONTROL)
1125 goto out_err;
1126 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1127 goto out_err;
1128 #else
1129 if (type)
1130 goto out_err;
1131 #endif
1132
1133 rc = s390_enable_sie();
1134 if (rc)
1135 goto out_err;
1136
1137 rc = -ENOMEM;
1138
1139 kvm->arch.use_esca = 0; /* start with basic SCA */
1140 rwlock_init(&kvm->arch.sca_lock);
1141 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1142 if (!kvm->arch.sca)
1143 goto out_err;
1144 spin_lock(&kvm_lock);
1145 sca_offset += 16;
1146 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1147 sca_offset = 0;
1148 kvm->arch.sca = (struct bsca_block *)
1149 ((char *) kvm->arch.sca + sca_offset);
1150 spin_unlock(&kvm_lock);
1151
1152 sprintf(debug_name, "kvm-%u", current->pid);
1153
1154 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1155 if (!kvm->arch.dbf)
1156 goto out_err;
1157
1158 /*
1159 * The architectural maximum amount of facilities is 16 kbit. To store
1160 * this amount, 2 kbyte of memory is required. Thus we need a full
1161 * page to hold the guest facility list (arch.model.fac->list) and the
1162 * facility mask (arch.model.fac->mask). Its address size has to be
1163 * 31 bits and word aligned.
1164 */
1165 kvm->arch.model.fac =
1166 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1167 if (!kvm->arch.model.fac)
1168 goto out_err;
1169
1170 /* Populate the facility mask initially. */
1171 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1172 S390_ARCH_FAC_LIST_SIZE_BYTE);
1173 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1174 if (i < kvm_s390_fac_list_mask_size())
1175 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1176 else
1177 kvm->arch.model.fac->mask[i] = 0UL;
1178 }
1179
1180 /* Populate the facility list initially. */
1181 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1182 S390_ARCH_FAC_LIST_SIZE_BYTE);
1183
1184 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1185 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1186
1187 if (kvm_s390_crypto_init(kvm) < 0)
1188 goto out_err;
1189
1190 spin_lock_init(&kvm->arch.float_int.lock);
1191 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1192 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1193 init_waitqueue_head(&kvm->arch.ipte_wq);
1194 mutex_init(&kvm->arch.ipte_mutex);
1195
1196 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1197 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1198
1199 if (type & KVM_VM_S390_UCONTROL) {
1200 kvm->arch.gmap = NULL;
1201 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1202 } else {
1203 if (sclp.hamax == U64_MAX)
1204 kvm->arch.mem_limit = TASK_MAX_SIZE;
1205 else
1206 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1207 sclp.hamax + 1);
1208 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1209 if (!kvm->arch.gmap)
1210 goto out_err;
1211 kvm->arch.gmap->private = kvm;
1212 kvm->arch.gmap->pfault_enabled = 0;
1213 }
1214
1215 kvm->arch.css_support = 0;
1216 kvm->arch.use_irqchip = 0;
1217 kvm->arch.epoch = 0;
1218
1219 spin_lock_init(&kvm->arch.start_stop_lock);
1220 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1221
1222 return 0;
1223 out_err:
1224 kfree(kvm->arch.crypto.crycb);
1225 free_page((unsigned long)kvm->arch.model.fac);
1226 debug_unregister(kvm->arch.dbf);
1227 sca_dispose(kvm);
1228 KVM_EVENT(3, "creation of vm failed: %d", rc);
1229 return rc;
1230 }
1231
1232 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1233 {
1234 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1235 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1236 kvm_s390_clear_local_irqs(vcpu);
1237 kvm_clear_async_pf_completion_queue(vcpu);
1238 if (!kvm_is_ucontrol(vcpu->kvm))
1239 sca_del_vcpu(vcpu);
1240
1241 if (kvm_is_ucontrol(vcpu->kvm))
1242 gmap_free(vcpu->arch.gmap);
1243
1244 if (vcpu->kvm->arch.use_cmma)
1245 kvm_s390_vcpu_unsetup_cmma(vcpu);
1246 free_page((unsigned long)(vcpu->arch.sie_block));
1247
1248 kvm_vcpu_uninit(vcpu);
1249 kmem_cache_free(kvm_vcpu_cache, vcpu);
1250 }
1251
1252 static void kvm_free_vcpus(struct kvm *kvm)
1253 {
1254 unsigned int i;
1255 struct kvm_vcpu *vcpu;
1256
1257 kvm_for_each_vcpu(i, vcpu, kvm)
1258 kvm_arch_vcpu_destroy(vcpu);
1259
1260 mutex_lock(&kvm->lock);
1261 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1262 kvm->vcpus[i] = NULL;
1263
1264 atomic_set(&kvm->online_vcpus, 0);
1265 mutex_unlock(&kvm->lock);
1266 }
1267
1268 void kvm_arch_destroy_vm(struct kvm *kvm)
1269 {
1270 kvm_free_vcpus(kvm);
1271 free_page((unsigned long)kvm->arch.model.fac);
1272 sca_dispose(kvm);
1273 debug_unregister(kvm->arch.dbf);
1274 kfree(kvm->arch.crypto.crycb);
1275 if (!kvm_is_ucontrol(kvm))
1276 gmap_free(kvm->arch.gmap);
1277 kvm_s390_destroy_adapters(kvm);
1278 kvm_s390_clear_float_irqs(kvm);
1279 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1280 }
1281
1282 /* Section: vcpu related */
1283 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1284 {
1285 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1286 if (!vcpu->arch.gmap)
1287 return -ENOMEM;
1288 vcpu->arch.gmap->private = vcpu->kvm;
1289
1290 return 0;
1291 }
1292
1293 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1294 {
1295 read_lock(&vcpu->kvm->arch.sca_lock);
1296 if (vcpu->kvm->arch.use_esca) {
1297 struct esca_block *sca = vcpu->kvm->arch.sca;
1298
1299 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1300 sca->cpu[vcpu->vcpu_id].sda = 0;
1301 } else {
1302 struct bsca_block *sca = vcpu->kvm->arch.sca;
1303
1304 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1305 sca->cpu[vcpu->vcpu_id].sda = 0;
1306 }
1307 read_unlock(&vcpu->kvm->arch.sca_lock);
1308 }
1309
1310 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1311 {
1312 read_lock(&vcpu->kvm->arch.sca_lock);
1313 if (vcpu->kvm->arch.use_esca) {
1314 struct esca_block *sca = vcpu->kvm->arch.sca;
1315
1316 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1317 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1318 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1319 vcpu->arch.sie_block->ecb2 |= 0x04U;
1320 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1321 } else {
1322 struct bsca_block *sca = vcpu->kvm->arch.sca;
1323
1324 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1325 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1326 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1327 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1328 }
1329 read_unlock(&vcpu->kvm->arch.sca_lock);
1330 }
1331
1332 /* Basic SCA to Extended SCA data copy routines */
1333 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1334 {
1335 d->sda = s->sda;
1336 d->sigp_ctrl.c = s->sigp_ctrl.c;
1337 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1338 }
1339
1340 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1341 {
1342 int i;
1343
1344 d->ipte_control = s->ipte_control;
1345 d->mcn[0] = s->mcn;
1346 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1347 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1348 }
1349
1350 static int sca_switch_to_extended(struct kvm *kvm)
1351 {
1352 struct bsca_block *old_sca = kvm->arch.sca;
1353 struct esca_block *new_sca;
1354 struct kvm_vcpu *vcpu;
1355 unsigned int vcpu_idx;
1356 u32 scaol, scaoh;
1357
1358 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1359 if (!new_sca)
1360 return -ENOMEM;
1361
1362 scaoh = (u32)((u64)(new_sca) >> 32);
1363 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1364
1365 kvm_s390_vcpu_block_all(kvm);
1366 write_lock(&kvm->arch.sca_lock);
1367
1368 sca_copy_b_to_e(new_sca, old_sca);
1369
1370 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1371 vcpu->arch.sie_block->scaoh = scaoh;
1372 vcpu->arch.sie_block->scaol = scaol;
1373 vcpu->arch.sie_block->ecb2 |= 0x04U;
1374 }
1375 kvm->arch.sca = new_sca;
1376 kvm->arch.use_esca = 1;
1377
1378 write_unlock(&kvm->arch.sca_lock);
1379 kvm_s390_vcpu_unblock_all(kvm);
1380
1381 free_page((unsigned long)old_sca);
1382
1383 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1384 old_sca, kvm->arch.sca);
1385 return 0;
1386 }
1387
1388 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1389 {
1390 int rc;
1391
1392 if (id < KVM_S390_BSCA_CPU_SLOTS)
1393 return true;
1394 if (!sclp.has_esca)
1395 return false;
1396
1397 mutex_lock(&kvm->lock);
1398 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1399 mutex_unlock(&kvm->lock);
1400
1401 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1402 }
1403
1404 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1405 {
1406 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1407 kvm_clear_async_pf_completion_queue(vcpu);
1408 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1409 KVM_SYNC_GPRS |
1410 KVM_SYNC_ACRS |
1411 KVM_SYNC_CRS |
1412 KVM_SYNC_ARCH0 |
1413 KVM_SYNC_PFAULT;
1414 if (test_kvm_facility(vcpu->kvm, 64))
1415 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1416 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1417 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1418 */
1419 if (MACHINE_HAS_VX)
1420 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1421 else
1422 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1423
1424 if (kvm_is_ucontrol(vcpu->kvm))
1425 return __kvm_ucontrol_vcpu_init(vcpu);
1426
1427 return 0;
1428 }
1429
1430 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1431 {
1432 /* Save host register state */
1433 save_fpu_regs();
1434 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1435 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1436
1437 if (MACHINE_HAS_VX)
1438 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1439 else
1440 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1441 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1442 if (test_fp_ctl(current->thread.fpu.fpc))
1443 /* User space provided an invalid FPC, let's clear it */
1444 current->thread.fpu.fpc = 0;
1445
1446 save_access_regs(vcpu->arch.host_acrs);
1447 restore_access_regs(vcpu->run->s.regs.acrs);
1448 gmap_enable(vcpu->arch.gmap);
1449 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1450 }
1451
1452 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1453 {
1454 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1455 gmap_disable(vcpu->arch.gmap);
1456
1457 /* Save guest register state */
1458 save_fpu_regs();
1459 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1460
1461 /* Restore host register state */
1462 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1463 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1464
1465 save_access_regs(vcpu->run->s.regs.acrs);
1466 restore_access_regs(vcpu->arch.host_acrs);
1467 }
1468
1469 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1470 {
1471 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1472 vcpu->arch.sie_block->gpsw.mask = 0UL;
1473 vcpu->arch.sie_block->gpsw.addr = 0UL;
1474 kvm_s390_set_prefix(vcpu, 0);
1475 vcpu->arch.sie_block->cputm = 0UL;
1476 vcpu->arch.sie_block->ckc = 0UL;
1477 vcpu->arch.sie_block->todpr = 0;
1478 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1479 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1480 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1481 /* make sure the new fpc will be lazily loaded */
1482 save_fpu_regs();
1483 current->thread.fpu.fpc = 0;
1484 vcpu->arch.sie_block->gbea = 1;
1485 vcpu->arch.sie_block->pp = 0;
1486 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1487 kvm_clear_async_pf_completion_queue(vcpu);
1488 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1489 kvm_s390_vcpu_stop(vcpu);
1490 kvm_s390_clear_local_irqs(vcpu);
1491 }
1492
1493 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1494 {
1495 mutex_lock(&vcpu->kvm->lock);
1496 preempt_disable();
1497 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1498 preempt_enable();
1499 mutex_unlock(&vcpu->kvm->lock);
1500 if (!kvm_is_ucontrol(vcpu->kvm)) {
1501 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1502 sca_add_vcpu(vcpu);
1503 }
1504
1505 }
1506
1507 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1508 {
1509 if (!test_kvm_facility(vcpu->kvm, 76))
1510 return;
1511
1512 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1513
1514 if (vcpu->kvm->arch.crypto.aes_kw)
1515 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1516 if (vcpu->kvm->arch.crypto.dea_kw)
1517 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1518
1519 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1520 }
1521
1522 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1523 {
1524 free_page(vcpu->arch.sie_block->cbrlo);
1525 vcpu->arch.sie_block->cbrlo = 0;
1526 }
1527
1528 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1529 {
1530 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1531 if (!vcpu->arch.sie_block->cbrlo)
1532 return -ENOMEM;
1533
1534 vcpu->arch.sie_block->ecb2 |= 0x80;
1535 vcpu->arch.sie_block->ecb2 &= ~0x08;
1536 return 0;
1537 }
1538
1539 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1540 {
1541 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1542
1543 vcpu->arch.cpu_id = model->cpu_id;
1544 vcpu->arch.sie_block->ibc = model->ibc;
1545 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1546 }
1547
1548 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1549 {
1550 int rc = 0;
1551
1552 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1553 CPUSTAT_SM |
1554 CPUSTAT_STOPPED);
1555
1556 if (test_kvm_facility(vcpu->kvm, 78))
1557 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1558 else if (test_kvm_facility(vcpu->kvm, 8))
1559 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1560
1561 kvm_s390_vcpu_setup_model(vcpu);
1562
1563 vcpu->arch.sie_block->ecb = 6;
1564 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1565 vcpu->arch.sie_block->ecb |= 0x10;
1566
1567 vcpu->arch.sie_block->ecb2 = 8;
1568 vcpu->arch.sie_block->eca = 0xC1002000U;
1569 if (sclp.has_siif)
1570 vcpu->arch.sie_block->eca |= 1;
1571 if (sclp.has_sigpif)
1572 vcpu->arch.sie_block->eca |= 0x10000000U;
1573 if (test_kvm_facility(vcpu->kvm, 64))
1574 vcpu->arch.sie_block->ecb3 |= 0x01;
1575 if (test_kvm_facility(vcpu->kvm, 129)) {
1576 vcpu->arch.sie_block->eca |= 0x00020000;
1577 vcpu->arch.sie_block->ecd |= 0x20000000;
1578 }
1579 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1580 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1581
1582 if (vcpu->kvm->arch.use_cmma) {
1583 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1584 if (rc)
1585 return rc;
1586 }
1587 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1588 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1589
1590 kvm_s390_vcpu_crypto_setup(vcpu);
1591
1592 return rc;
1593 }
1594
1595 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1596 unsigned int id)
1597 {
1598 struct kvm_vcpu *vcpu;
1599 struct sie_page *sie_page;
1600 int rc = -EINVAL;
1601
1602 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1603 goto out;
1604
1605 rc = -ENOMEM;
1606
1607 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1608 if (!vcpu)
1609 goto out;
1610
1611 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1612 if (!sie_page)
1613 goto out_free_cpu;
1614
1615 vcpu->arch.sie_block = &sie_page->sie_block;
1616 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1617
1618 vcpu->arch.sie_block->icpua = id;
1619 spin_lock_init(&vcpu->arch.local_int.lock);
1620 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1621 vcpu->arch.local_int.wq = &vcpu->wq;
1622 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1623
1624 rc = kvm_vcpu_init(vcpu, kvm, id);
1625 if (rc)
1626 goto out_free_sie_block;
1627 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1628 vcpu->arch.sie_block);
1629 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1630
1631 return vcpu;
1632 out_free_sie_block:
1633 free_page((unsigned long)(vcpu->arch.sie_block));
1634 out_free_cpu:
1635 kmem_cache_free(kvm_vcpu_cache, vcpu);
1636 out:
1637 return ERR_PTR(rc);
1638 }
1639
1640 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1641 {
1642 return kvm_s390_vcpu_has_irq(vcpu, 0);
1643 }
1644
1645 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1646 {
1647 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1648 exit_sie(vcpu);
1649 }
1650
1651 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1652 {
1653 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1654 }
1655
1656 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1657 {
1658 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1659 exit_sie(vcpu);
1660 }
1661
1662 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1663 {
1664 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1665 }
1666
1667 /*
1668 * Kick a guest cpu out of SIE and wait until SIE is not running.
1669 * If the CPU is not running (e.g. waiting as idle) the function will
1670 * return immediately. */
1671 void exit_sie(struct kvm_vcpu *vcpu)
1672 {
1673 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1674 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1675 cpu_relax();
1676 }
1677
1678 /* Kick a guest cpu out of SIE to process a request synchronously */
1679 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1680 {
1681 kvm_make_request(req, vcpu);
1682 kvm_s390_vcpu_request(vcpu);
1683 }
1684
1685 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1686 {
1687 int i;
1688 struct kvm *kvm = gmap->private;
1689 struct kvm_vcpu *vcpu;
1690
1691 kvm_for_each_vcpu(i, vcpu, kvm) {
1692 /* match against both prefix pages */
1693 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1694 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1695 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1696 }
1697 }
1698 }
1699
1700 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1701 {
1702 /* kvm common code refers to this, but never calls it */
1703 BUG();
1704 return 0;
1705 }
1706
1707 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1708 struct kvm_one_reg *reg)
1709 {
1710 int r = -EINVAL;
1711
1712 switch (reg->id) {
1713 case KVM_REG_S390_TODPR:
1714 r = put_user(vcpu->arch.sie_block->todpr,
1715 (u32 __user *)reg->addr);
1716 break;
1717 case KVM_REG_S390_EPOCHDIFF:
1718 r = put_user(vcpu->arch.sie_block->epoch,
1719 (u64 __user *)reg->addr);
1720 break;
1721 case KVM_REG_S390_CPU_TIMER:
1722 r = put_user(vcpu->arch.sie_block->cputm,
1723 (u64 __user *)reg->addr);
1724 break;
1725 case KVM_REG_S390_CLOCK_COMP:
1726 r = put_user(vcpu->arch.sie_block->ckc,
1727 (u64 __user *)reg->addr);
1728 break;
1729 case KVM_REG_S390_PFTOKEN:
1730 r = put_user(vcpu->arch.pfault_token,
1731 (u64 __user *)reg->addr);
1732 break;
1733 case KVM_REG_S390_PFCOMPARE:
1734 r = put_user(vcpu->arch.pfault_compare,
1735 (u64 __user *)reg->addr);
1736 break;
1737 case KVM_REG_S390_PFSELECT:
1738 r = put_user(vcpu->arch.pfault_select,
1739 (u64 __user *)reg->addr);
1740 break;
1741 case KVM_REG_S390_PP:
1742 r = put_user(vcpu->arch.sie_block->pp,
1743 (u64 __user *)reg->addr);
1744 break;
1745 case KVM_REG_S390_GBEA:
1746 r = put_user(vcpu->arch.sie_block->gbea,
1747 (u64 __user *)reg->addr);
1748 break;
1749 default:
1750 break;
1751 }
1752
1753 return r;
1754 }
1755
1756 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1757 struct kvm_one_reg *reg)
1758 {
1759 int r = -EINVAL;
1760
1761 switch (reg->id) {
1762 case KVM_REG_S390_TODPR:
1763 r = get_user(vcpu->arch.sie_block->todpr,
1764 (u32 __user *)reg->addr);
1765 break;
1766 case KVM_REG_S390_EPOCHDIFF:
1767 r = get_user(vcpu->arch.sie_block->epoch,
1768 (u64 __user *)reg->addr);
1769 break;
1770 case KVM_REG_S390_CPU_TIMER:
1771 r = get_user(vcpu->arch.sie_block->cputm,
1772 (u64 __user *)reg->addr);
1773 break;
1774 case KVM_REG_S390_CLOCK_COMP:
1775 r = get_user(vcpu->arch.sie_block->ckc,
1776 (u64 __user *)reg->addr);
1777 break;
1778 case KVM_REG_S390_PFTOKEN:
1779 r = get_user(vcpu->arch.pfault_token,
1780 (u64 __user *)reg->addr);
1781 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1782 kvm_clear_async_pf_completion_queue(vcpu);
1783 break;
1784 case KVM_REG_S390_PFCOMPARE:
1785 r = get_user(vcpu->arch.pfault_compare,
1786 (u64 __user *)reg->addr);
1787 break;
1788 case KVM_REG_S390_PFSELECT:
1789 r = get_user(vcpu->arch.pfault_select,
1790 (u64 __user *)reg->addr);
1791 break;
1792 case KVM_REG_S390_PP:
1793 r = get_user(vcpu->arch.sie_block->pp,
1794 (u64 __user *)reg->addr);
1795 break;
1796 case KVM_REG_S390_GBEA:
1797 r = get_user(vcpu->arch.sie_block->gbea,
1798 (u64 __user *)reg->addr);
1799 break;
1800 default:
1801 break;
1802 }
1803
1804 return r;
1805 }
1806
1807 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1808 {
1809 kvm_s390_vcpu_initial_reset(vcpu);
1810 return 0;
1811 }
1812
1813 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1814 {
1815 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1816 return 0;
1817 }
1818
1819 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1820 {
1821 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1822 return 0;
1823 }
1824
1825 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1826 struct kvm_sregs *sregs)
1827 {
1828 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1829 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1830 restore_access_regs(vcpu->run->s.regs.acrs);
1831 return 0;
1832 }
1833
1834 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1835 struct kvm_sregs *sregs)
1836 {
1837 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1838 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1839 return 0;
1840 }
1841
1842 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1843 {
1844 /* make sure the new values will be lazily loaded */
1845 save_fpu_regs();
1846 if (test_fp_ctl(fpu->fpc))
1847 return -EINVAL;
1848 current->thread.fpu.fpc = fpu->fpc;
1849 if (MACHINE_HAS_VX)
1850 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1851 else
1852 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1853 return 0;
1854 }
1855
1856 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1857 {
1858 /* make sure we have the latest values */
1859 save_fpu_regs();
1860 if (MACHINE_HAS_VX)
1861 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1862 else
1863 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1864 fpu->fpc = current->thread.fpu.fpc;
1865 return 0;
1866 }
1867
1868 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1869 {
1870 int rc = 0;
1871
1872 if (!is_vcpu_stopped(vcpu))
1873 rc = -EBUSY;
1874 else {
1875 vcpu->run->psw_mask = psw.mask;
1876 vcpu->run->psw_addr = psw.addr;
1877 }
1878 return rc;
1879 }
1880
1881 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1882 struct kvm_translation *tr)
1883 {
1884 return -EINVAL; /* not implemented yet */
1885 }
1886
1887 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1888 KVM_GUESTDBG_USE_HW_BP | \
1889 KVM_GUESTDBG_ENABLE)
1890
1891 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1892 struct kvm_guest_debug *dbg)
1893 {
1894 int rc = 0;
1895
1896 vcpu->guest_debug = 0;
1897 kvm_s390_clear_bp_data(vcpu);
1898
1899 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1900 return -EINVAL;
1901
1902 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1903 vcpu->guest_debug = dbg->control;
1904 /* enforce guest PER */
1905 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1906
1907 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1908 rc = kvm_s390_import_bp_data(vcpu, dbg);
1909 } else {
1910 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1911 vcpu->arch.guestdbg.last_bp = 0;
1912 }
1913
1914 if (rc) {
1915 vcpu->guest_debug = 0;
1916 kvm_s390_clear_bp_data(vcpu);
1917 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1918 }
1919
1920 return rc;
1921 }
1922
1923 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1924 struct kvm_mp_state *mp_state)
1925 {
1926 /* CHECK_STOP and LOAD are not supported yet */
1927 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1928 KVM_MP_STATE_OPERATING;
1929 }
1930
1931 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1932 struct kvm_mp_state *mp_state)
1933 {
1934 int rc = 0;
1935
1936 /* user space knows about this interface - let it control the state */
1937 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1938
1939 switch (mp_state->mp_state) {
1940 case KVM_MP_STATE_STOPPED:
1941 kvm_s390_vcpu_stop(vcpu);
1942 break;
1943 case KVM_MP_STATE_OPERATING:
1944 kvm_s390_vcpu_start(vcpu);
1945 break;
1946 case KVM_MP_STATE_LOAD:
1947 case KVM_MP_STATE_CHECK_STOP:
1948 /* fall through - CHECK_STOP and LOAD are not supported yet */
1949 default:
1950 rc = -ENXIO;
1951 }
1952
1953 return rc;
1954 }
1955
1956 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1957 {
1958 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1959 }
1960
1961 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1962 {
1963 retry:
1964 kvm_s390_vcpu_request_handled(vcpu);
1965 if (!vcpu->requests)
1966 return 0;
1967 /*
1968 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1969 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1970 * This ensures that the ipte instruction for this request has
1971 * already finished. We might race against a second unmapper that
1972 * wants to set the blocking bit. Lets just retry the request loop.
1973 */
1974 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1975 int rc;
1976 rc = gmap_ipte_notify(vcpu->arch.gmap,
1977 kvm_s390_get_prefix(vcpu),
1978 PAGE_SIZE * 2);
1979 if (rc)
1980 return rc;
1981 goto retry;
1982 }
1983
1984 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1985 vcpu->arch.sie_block->ihcpu = 0xffff;
1986 goto retry;
1987 }
1988
1989 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1990 if (!ibs_enabled(vcpu)) {
1991 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1992 atomic_or(CPUSTAT_IBS,
1993 &vcpu->arch.sie_block->cpuflags);
1994 }
1995 goto retry;
1996 }
1997
1998 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1999 if (ibs_enabled(vcpu)) {
2000 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2001 atomic_andnot(CPUSTAT_IBS,
2002 &vcpu->arch.sie_block->cpuflags);
2003 }
2004 goto retry;
2005 }
2006
2007 /* nothing to do, just clear the request */
2008 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2009
2010 return 0;
2011 }
2012
2013 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2014 {
2015 struct kvm_vcpu *vcpu;
2016 int i;
2017
2018 mutex_lock(&kvm->lock);
2019 preempt_disable();
2020 kvm->arch.epoch = tod - get_tod_clock();
2021 kvm_s390_vcpu_block_all(kvm);
2022 kvm_for_each_vcpu(i, vcpu, kvm)
2023 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2024 kvm_s390_vcpu_unblock_all(kvm);
2025 preempt_enable();
2026 mutex_unlock(&kvm->lock);
2027 }
2028
2029 /**
2030 * kvm_arch_fault_in_page - fault-in guest page if necessary
2031 * @vcpu: The corresponding virtual cpu
2032 * @gpa: Guest physical address
2033 * @writable: Whether the page should be writable or not
2034 *
2035 * Make sure that a guest page has been faulted-in on the host.
2036 *
2037 * Return: Zero on success, negative error code otherwise.
2038 */
2039 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2040 {
2041 return gmap_fault(vcpu->arch.gmap, gpa,
2042 writable ? FAULT_FLAG_WRITE : 0);
2043 }
2044
2045 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2046 unsigned long token)
2047 {
2048 struct kvm_s390_interrupt inti;
2049 struct kvm_s390_irq irq;
2050
2051 if (start_token) {
2052 irq.u.ext.ext_params2 = token;
2053 irq.type = KVM_S390_INT_PFAULT_INIT;
2054 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2055 } else {
2056 inti.type = KVM_S390_INT_PFAULT_DONE;
2057 inti.parm64 = token;
2058 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2059 }
2060 }
2061
2062 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2063 struct kvm_async_pf *work)
2064 {
2065 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2066 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2067 }
2068
2069 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2070 struct kvm_async_pf *work)
2071 {
2072 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2073 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2074 }
2075
2076 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2077 struct kvm_async_pf *work)
2078 {
2079 /* s390 will always inject the page directly */
2080 }
2081
2082 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2083 {
2084 /*
2085 * s390 will always inject the page directly,
2086 * but we still want check_async_completion to cleanup
2087 */
2088 return true;
2089 }
2090
2091 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2092 {
2093 hva_t hva;
2094 struct kvm_arch_async_pf arch;
2095 int rc;
2096
2097 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2098 return 0;
2099 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2100 vcpu->arch.pfault_compare)
2101 return 0;
2102 if (psw_extint_disabled(vcpu))
2103 return 0;
2104 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2105 return 0;
2106 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2107 return 0;
2108 if (!vcpu->arch.gmap->pfault_enabled)
2109 return 0;
2110
2111 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2112 hva += current->thread.gmap_addr & ~PAGE_MASK;
2113 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2114 return 0;
2115
2116 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2117 return rc;
2118 }
2119
2120 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2121 {
2122 int rc, cpuflags;
2123
2124 /*
2125 * On s390 notifications for arriving pages will be delivered directly
2126 * to the guest but the house keeping for completed pfaults is
2127 * handled outside the worker.
2128 */
2129 kvm_check_async_pf_completion(vcpu);
2130
2131 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2132 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2133
2134 if (need_resched())
2135 schedule();
2136
2137 if (test_cpu_flag(CIF_MCCK_PENDING))
2138 s390_handle_mcck();
2139
2140 if (!kvm_is_ucontrol(vcpu->kvm)) {
2141 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2142 if (rc)
2143 return rc;
2144 }
2145
2146 rc = kvm_s390_handle_requests(vcpu);
2147 if (rc)
2148 return rc;
2149
2150 if (guestdbg_enabled(vcpu)) {
2151 kvm_s390_backup_guest_per_regs(vcpu);
2152 kvm_s390_patch_guest_per_regs(vcpu);
2153 }
2154
2155 vcpu->arch.sie_block->icptcode = 0;
2156 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2157 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2158 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2159
2160 return 0;
2161 }
2162
2163 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2164 {
2165 struct kvm_s390_pgm_info pgm_info = {
2166 .code = PGM_ADDRESSING,
2167 };
2168 u8 opcode, ilen;
2169 int rc;
2170
2171 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2172 trace_kvm_s390_sie_fault(vcpu);
2173
2174 /*
2175 * We want to inject an addressing exception, which is defined as a
2176 * suppressing or terminating exception. However, since we came here
2177 * by a DAT access exception, the PSW still points to the faulting
2178 * instruction since DAT exceptions are nullifying. So we've got
2179 * to look up the current opcode to get the length of the instruction
2180 * to be able to forward the PSW.
2181 */
2182 rc = read_guest_instr(vcpu, &opcode, 1);
2183 ilen = insn_length(opcode);
2184 if (rc < 0) {
2185 return rc;
2186 } else if (rc) {
2187 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2188 * Forward by arbitrary ilc, injection will take care of
2189 * nullification if necessary.
2190 */
2191 pgm_info = vcpu->arch.pgm;
2192 ilen = 4;
2193 }
2194 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2195 kvm_s390_forward_psw(vcpu, ilen);
2196 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2197 }
2198
2199 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2200 {
2201 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2202 vcpu->arch.sie_block->icptcode);
2203 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2204
2205 if (guestdbg_enabled(vcpu))
2206 kvm_s390_restore_guest_per_regs(vcpu);
2207
2208 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2209 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2210
2211 if (vcpu->arch.sie_block->icptcode > 0) {
2212 int rc = kvm_handle_sie_intercept(vcpu);
2213
2214 if (rc != -EOPNOTSUPP)
2215 return rc;
2216 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2217 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2218 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2219 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2220 return -EREMOTE;
2221 } else if (exit_reason != -EFAULT) {
2222 vcpu->stat.exit_null++;
2223 return 0;
2224 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2225 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2226 vcpu->run->s390_ucontrol.trans_exc_code =
2227 current->thread.gmap_addr;
2228 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2229 return -EREMOTE;
2230 } else if (current->thread.gmap_pfault) {
2231 trace_kvm_s390_major_guest_pfault(vcpu);
2232 current->thread.gmap_pfault = 0;
2233 if (kvm_arch_setup_async_pf(vcpu))
2234 return 0;
2235 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2236 }
2237 return vcpu_post_run_fault_in_sie(vcpu);
2238 }
2239
2240 static int __vcpu_run(struct kvm_vcpu *vcpu)
2241 {
2242 int rc, exit_reason;
2243
2244 /*
2245 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2246 * ning the guest), so that memslots (and other stuff) are protected
2247 */
2248 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2249
2250 do {
2251 rc = vcpu_pre_run(vcpu);
2252 if (rc)
2253 break;
2254
2255 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2256 /*
2257 * As PF_VCPU will be used in fault handler, between
2258 * guest_enter and guest_exit should be no uaccess.
2259 */
2260 local_irq_disable();
2261 __kvm_guest_enter();
2262 local_irq_enable();
2263 exit_reason = sie64a(vcpu->arch.sie_block,
2264 vcpu->run->s.regs.gprs);
2265 local_irq_disable();
2266 __kvm_guest_exit();
2267 local_irq_enable();
2268 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2269
2270 rc = vcpu_post_run(vcpu, exit_reason);
2271 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2272
2273 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2274 return rc;
2275 }
2276
2277 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2278 {
2279 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2280 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2281 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2282 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2283 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2284 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2285 /* some control register changes require a tlb flush */
2286 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2287 }
2288 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2289 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2290 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2291 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2292 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2293 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2294 }
2295 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2296 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2297 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2298 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2299 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2300 kvm_clear_async_pf_completion_queue(vcpu);
2301 }
2302 kvm_run->kvm_dirty_regs = 0;
2303 }
2304
2305 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2306 {
2307 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2308 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2309 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2310 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2311 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2312 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2313 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2314 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2315 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2316 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2317 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2318 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2319 }
2320
2321 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2322 {
2323 int rc;
2324 sigset_t sigsaved;
2325
2326 if (guestdbg_exit_pending(vcpu)) {
2327 kvm_s390_prepare_debug_exit(vcpu);
2328 return 0;
2329 }
2330
2331 if (vcpu->sigset_active)
2332 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2333
2334 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2335 kvm_s390_vcpu_start(vcpu);
2336 } else if (is_vcpu_stopped(vcpu)) {
2337 pr_err_ratelimited("can't run stopped vcpu %d\n",
2338 vcpu->vcpu_id);
2339 return -EINVAL;
2340 }
2341
2342 sync_regs(vcpu, kvm_run);
2343
2344 might_fault();
2345 rc = __vcpu_run(vcpu);
2346
2347 if (signal_pending(current) && !rc) {
2348 kvm_run->exit_reason = KVM_EXIT_INTR;
2349 rc = -EINTR;
2350 }
2351
2352 if (guestdbg_exit_pending(vcpu) && !rc) {
2353 kvm_s390_prepare_debug_exit(vcpu);
2354 rc = 0;
2355 }
2356
2357 if (rc == -EREMOTE) {
2358 /* userspace support is needed, kvm_run has been prepared */
2359 rc = 0;
2360 }
2361
2362 store_regs(vcpu, kvm_run);
2363
2364 if (vcpu->sigset_active)
2365 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2366
2367 vcpu->stat.exit_userspace++;
2368 return rc;
2369 }
2370
2371 /*
2372 * store status at address
2373 * we use have two special cases:
2374 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2375 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2376 */
2377 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2378 {
2379 unsigned char archmode = 1;
2380 freg_t fprs[NUM_FPRS];
2381 unsigned int px;
2382 u64 clkcomp;
2383 int rc;
2384
2385 px = kvm_s390_get_prefix(vcpu);
2386 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2387 if (write_guest_abs(vcpu, 163, &archmode, 1))
2388 return -EFAULT;
2389 gpa = 0;
2390 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2391 if (write_guest_real(vcpu, 163, &archmode, 1))
2392 return -EFAULT;
2393 gpa = px;
2394 } else
2395 gpa -= __LC_FPREGS_SAVE_AREA;
2396
2397 /* manually convert vector registers if necessary */
2398 if (MACHINE_HAS_VX) {
2399 convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
2400 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2401 fprs, 128);
2402 } else {
2403 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2404 vcpu->run->s.regs.fprs, 128);
2405 }
2406 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2407 vcpu->run->s.regs.gprs, 128);
2408 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2409 &vcpu->arch.sie_block->gpsw, 16);
2410 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2411 &px, 4);
2412 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2413 &vcpu->run->s.regs.fpc, 4);
2414 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2415 &vcpu->arch.sie_block->todpr, 4);
2416 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2417 &vcpu->arch.sie_block->cputm, 8);
2418 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2419 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2420 &clkcomp, 8);
2421 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2422 &vcpu->run->s.regs.acrs, 64);
2423 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2424 &vcpu->arch.sie_block->gcr, 128);
2425 return rc ? -EFAULT : 0;
2426 }
2427
2428 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2429 {
2430 /*
2431 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2432 * copying in vcpu load/put. Lets update our copies before we save
2433 * it into the save area
2434 */
2435 save_fpu_regs();
2436 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2437 save_access_regs(vcpu->run->s.regs.acrs);
2438
2439 return kvm_s390_store_status_unloaded(vcpu, addr);
2440 }
2441
2442 /*
2443 * store additional status at address
2444 */
2445 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2446 unsigned long gpa)
2447 {
2448 /* Only bits 0-53 are used for address formation */
2449 if (!(gpa & ~0x3ff))
2450 return 0;
2451
2452 return write_guest_abs(vcpu, gpa & ~0x3ff,
2453 (void *)&vcpu->run->s.regs.vrs, 512);
2454 }
2455
2456 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2457 {
2458 if (!test_kvm_facility(vcpu->kvm, 129))
2459 return 0;
2460
2461 /*
2462 * The guest VXRS are in the host VXRs due to the lazy
2463 * copying in vcpu load/put. We can simply call save_fpu_regs()
2464 * to save the current register state because we are in the
2465 * middle of a load/put cycle.
2466 *
2467 * Let's update our copies before we save it into the save area.
2468 */
2469 save_fpu_regs();
2470
2471 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2472 }
2473
2474 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2475 {
2476 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2477 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2478 }
2479
2480 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2481 {
2482 unsigned int i;
2483 struct kvm_vcpu *vcpu;
2484
2485 kvm_for_each_vcpu(i, vcpu, kvm) {
2486 __disable_ibs_on_vcpu(vcpu);
2487 }
2488 }
2489
2490 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2491 {
2492 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2493 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2494 }
2495
2496 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2497 {
2498 int i, online_vcpus, started_vcpus = 0;
2499
2500 if (!is_vcpu_stopped(vcpu))
2501 return;
2502
2503 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2504 /* Only one cpu at a time may enter/leave the STOPPED state. */
2505 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2506 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2507
2508 for (i = 0; i < online_vcpus; i++) {
2509 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2510 started_vcpus++;
2511 }
2512
2513 if (started_vcpus == 0) {
2514 /* we're the only active VCPU -> speed it up */
2515 __enable_ibs_on_vcpu(vcpu);
2516 } else if (started_vcpus == 1) {
2517 /*
2518 * As we are starting a second VCPU, we have to disable
2519 * the IBS facility on all VCPUs to remove potentially
2520 * oustanding ENABLE requests.
2521 */
2522 __disable_ibs_on_all_vcpus(vcpu->kvm);
2523 }
2524
2525 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2526 /*
2527 * Another VCPU might have used IBS while we were offline.
2528 * Let's play safe and flush the VCPU at startup.
2529 */
2530 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2531 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2532 return;
2533 }
2534
2535 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2536 {
2537 int i, online_vcpus, started_vcpus = 0;
2538 struct kvm_vcpu *started_vcpu = NULL;
2539
2540 if (is_vcpu_stopped(vcpu))
2541 return;
2542
2543 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2544 /* Only one cpu at a time may enter/leave the STOPPED state. */
2545 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2546 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2547
2548 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2549 kvm_s390_clear_stop_irq(vcpu);
2550
2551 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2552 __disable_ibs_on_vcpu(vcpu);
2553
2554 for (i = 0; i < online_vcpus; i++) {
2555 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2556 started_vcpus++;
2557 started_vcpu = vcpu->kvm->vcpus[i];
2558 }
2559 }
2560
2561 if (started_vcpus == 1) {
2562 /*
2563 * As we only have one VCPU left, we want to enable the
2564 * IBS facility for that VCPU to speed it up.
2565 */
2566 __enable_ibs_on_vcpu(started_vcpu);
2567 }
2568
2569 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2570 return;
2571 }
2572
2573 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2574 struct kvm_enable_cap *cap)
2575 {
2576 int r;
2577
2578 if (cap->flags)
2579 return -EINVAL;
2580
2581 switch (cap->cap) {
2582 case KVM_CAP_S390_CSS_SUPPORT:
2583 if (!vcpu->kvm->arch.css_support) {
2584 vcpu->kvm->arch.css_support = 1;
2585 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2586 trace_kvm_s390_enable_css(vcpu->kvm);
2587 }
2588 r = 0;
2589 break;
2590 default:
2591 r = -EINVAL;
2592 break;
2593 }
2594 return r;
2595 }
2596
2597 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2598 struct kvm_s390_mem_op *mop)
2599 {
2600 void __user *uaddr = (void __user *)mop->buf;
2601 void *tmpbuf = NULL;
2602 int r, srcu_idx;
2603 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2604 | KVM_S390_MEMOP_F_CHECK_ONLY;
2605
2606 if (mop->flags & ~supported_flags)
2607 return -EINVAL;
2608
2609 if (mop->size > MEM_OP_MAX_SIZE)
2610 return -E2BIG;
2611
2612 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2613 tmpbuf = vmalloc(mop->size);
2614 if (!tmpbuf)
2615 return -ENOMEM;
2616 }
2617
2618 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2619
2620 switch (mop->op) {
2621 case KVM_S390_MEMOP_LOGICAL_READ:
2622 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2623 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2624 mop->size, GACC_FETCH);
2625 break;
2626 }
2627 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2628 if (r == 0) {
2629 if (copy_to_user(uaddr, tmpbuf, mop->size))
2630 r = -EFAULT;
2631 }
2632 break;
2633 case KVM_S390_MEMOP_LOGICAL_WRITE:
2634 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2635 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2636 mop->size, GACC_STORE);
2637 break;
2638 }
2639 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2640 r = -EFAULT;
2641 break;
2642 }
2643 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2644 break;
2645 default:
2646 r = -EINVAL;
2647 }
2648
2649 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2650
2651 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2652 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2653
2654 vfree(tmpbuf);
2655 return r;
2656 }
2657
2658 long kvm_arch_vcpu_ioctl(struct file *filp,
2659 unsigned int ioctl, unsigned long arg)
2660 {
2661 struct kvm_vcpu *vcpu = filp->private_data;
2662 void __user *argp = (void __user *)arg;
2663 int idx;
2664 long r;
2665
2666 switch (ioctl) {
2667 case KVM_S390_IRQ: {
2668 struct kvm_s390_irq s390irq;
2669
2670 r = -EFAULT;
2671 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2672 break;
2673 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2674 break;
2675 }
2676 case KVM_S390_INTERRUPT: {
2677 struct kvm_s390_interrupt s390int;
2678 struct kvm_s390_irq s390irq;
2679
2680 r = -EFAULT;
2681 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2682 break;
2683 if (s390int_to_s390irq(&s390int, &s390irq))
2684 return -EINVAL;
2685 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2686 break;
2687 }
2688 case KVM_S390_STORE_STATUS:
2689 idx = srcu_read_lock(&vcpu->kvm->srcu);
2690 r = kvm_s390_vcpu_store_status(vcpu, arg);
2691 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2692 break;
2693 case KVM_S390_SET_INITIAL_PSW: {
2694 psw_t psw;
2695
2696 r = -EFAULT;
2697 if (copy_from_user(&psw, argp, sizeof(psw)))
2698 break;
2699 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2700 break;
2701 }
2702 case KVM_S390_INITIAL_RESET:
2703 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2704 break;
2705 case KVM_SET_ONE_REG:
2706 case KVM_GET_ONE_REG: {
2707 struct kvm_one_reg reg;
2708 r = -EFAULT;
2709 if (copy_from_user(&reg, argp, sizeof(reg)))
2710 break;
2711 if (ioctl == KVM_SET_ONE_REG)
2712 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2713 else
2714 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2715 break;
2716 }
2717 #ifdef CONFIG_KVM_S390_UCONTROL
2718 case KVM_S390_UCAS_MAP: {
2719 struct kvm_s390_ucas_mapping ucasmap;
2720
2721 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2722 r = -EFAULT;
2723 break;
2724 }
2725
2726 if (!kvm_is_ucontrol(vcpu->kvm)) {
2727 r = -EINVAL;
2728 break;
2729 }
2730
2731 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2732 ucasmap.vcpu_addr, ucasmap.length);
2733 break;
2734 }
2735 case KVM_S390_UCAS_UNMAP: {
2736 struct kvm_s390_ucas_mapping ucasmap;
2737
2738 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2739 r = -EFAULT;
2740 break;
2741 }
2742
2743 if (!kvm_is_ucontrol(vcpu->kvm)) {
2744 r = -EINVAL;
2745 break;
2746 }
2747
2748 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2749 ucasmap.length);
2750 break;
2751 }
2752 #endif
2753 case KVM_S390_VCPU_FAULT: {
2754 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2755 break;
2756 }
2757 case KVM_ENABLE_CAP:
2758 {
2759 struct kvm_enable_cap cap;
2760 r = -EFAULT;
2761 if (copy_from_user(&cap, argp, sizeof(cap)))
2762 break;
2763 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2764 break;
2765 }
2766 case KVM_S390_MEM_OP: {
2767 struct kvm_s390_mem_op mem_op;
2768
2769 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2770 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2771 else
2772 r = -EFAULT;
2773 break;
2774 }
2775 case KVM_S390_SET_IRQ_STATE: {
2776 struct kvm_s390_irq_state irq_state;
2777
2778 r = -EFAULT;
2779 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2780 break;
2781 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2782 irq_state.len == 0 ||
2783 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2784 r = -EINVAL;
2785 break;
2786 }
2787 r = kvm_s390_set_irq_state(vcpu,
2788 (void __user *) irq_state.buf,
2789 irq_state.len);
2790 break;
2791 }
2792 case KVM_S390_GET_IRQ_STATE: {
2793 struct kvm_s390_irq_state irq_state;
2794
2795 r = -EFAULT;
2796 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2797 break;
2798 if (irq_state.len == 0) {
2799 r = -EINVAL;
2800 break;
2801 }
2802 r = kvm_s390_get_irq_state(vcpu,
2803 (__u8 __user *) irq_state.buf,
2804 irq_state.len);
2805 break;
2806 }
2807 default:
2808 r = -ENOTTY;
2809 }
2810 return r;
2811 }
2812
2813 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2814 {
2815 #ifdef CONFIG_KVM_S390_UCONTROL
2816 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2817 && (kvm_is_ucontrol(vcpu->kvm))) {
2818 vmf->page = virt_to_page(vcpu->arch.sie_block);
2819 get_page(vmf->page);
2820 return 0;
2821 }
2822 #endif
2823 return VM_FAULT_SIGBUS;
2824 }
2825
2826 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2827 unsigned long npages)
2828 {
2829 return 0;
2830 }
2831
2832 /* Section: memory related */
2833 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2834 struct kvm_memory_slot *memslot,
2835 const struct kvm_userspace_memory_region *mem,
2836 enum kvm_mr_change change)
2837 {
2838 /* A few sanity checks. We can have memory slots which have to be
2839 located/ended at a segment boundary (1MB). The memory in userland is
2840 ok to be fragmented into various different vmas. It is okay to mmap()
2841 and munmap() stuff in this slot after doing this call at any time */
2842
2843 if (mem->userspace_addr & 0xffffful)
2844 return -EINVAL;
2845
2846 if (mem->memory_size & 0xffffful)
2847 return -EINVAL;
2848
2849 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2850 return -EINVAL;
2851
2852 return 0;
2853 }
2854
2855 void kvm_arch_commit_memory_region(struct kvm *kvm,
2856 const struct kvm_userspace_memory_region *mem,
2857 const struct kvm_memory_slot *old,
2858 const struct kvm_memory_slot *new,
2859 enum kvm_mr_change change)
2860 {
2861 int rc;
2862
2863 /* If the basics of the memslot do not change, we do not want
2864 * to update the gmap. Every update causes several unnecessary
2865 * segment translation exceptions. This is usually handled just
2866 * fine by the normal fault handler + gmap, but it will also
2867 * cause faults on the prefix page of running guest CPUs.
2868 */
2869 if (old->userspace_addr == mem->userspace_addr &&
2870 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2871 old->npages * PAGE_SIZE == mem->memory_size)
2872 return;
2873
2874 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2875 mem->guest_phys_addr, mem->memory_size);
2876 if (rc)
2877 pr_warn("failed to commit memory region\n");
2878 return;
2879 }
2880
2881 static int __init kvm_s390_init(void)
2882 {
2883 if (!sclp.has_sief2) {
2884 pr_info("SIE not available\n");
2885 return -ENODEV;
2886 }
2887
2888 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2889 }
2890
2891 static void __exit kvm_s390_exit(void)
2892 {
2893 kvm_exit();
2894 }
2895
2896 module_init(kvm_s390_init);
2897 module_exit(kvm_s390_exit);
2898
2899 /*
2900 * Enable autoloading of the kvm module.
2901 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2902 * since x86 takes a different approach.
2903 */
2904 #include <linux/miscdevice.h>
2905 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2906 MODULE_ALIAS("devname:kvm");