1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 const struct _kvm_stats_desc kvm_vm_stats_desc
[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM
, inject_io
),
64 STATS_DESC_COUNTER(VM
, inject_float_mchk
),
65 STATS_DESC_COUNTER(VM
, inject_pfault_done
),
66 STATS_DESC_COUNTER(VM
, inject_service_signal
),
67 STATS_DESC_COUNTER(VM
, inject_virtio
)
70 const struct kvm_stats_header kvm_vm_stats_header
= {
71 .name_size
= KVM_STATS_NAME_SIZE
,
72 .num_desc
= ARRAY_SIZE(kvm_vm_stats_desc
),
73 .id_offset
= sizeof(struct kvm_stats_header
),
74 .desc_offset
= sizeof(struct kvm_stats_header
) + KVM_STATS_NAME_SIZE
,
75 .data_offset
= sizeof(struct kvm_stats_header
) + KVM_STATS_NAME_SIZE
+
76 sizeof(kvm_vm_stats_desc
),
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc
[] = {
80 KVM_GENERIC_VCPU_STATS(),
81 STATS_DESC_COUNTER(VCPU
, exit_userspace
),
82 STATS_DESC_COUNTER(VCPU
, exit_null
),
83 STATS_DESC_COUNTER(VCPU
, exit_external_request
),
84 STATS_DESC_COUNTER(VCPU
, exit_io_request
),
85 STATS_DESC_COUNTER(VCPU
, exit_external_interrupt
),
86 STATS_DESC_COUNTER(VCPU
, exit_stop_request
),
87 STATS_DESC_COUNTER(VCPU
, exit_validity
),
88 STATS_DESC_COUNTER(VCPU
, exit_instruction
),
89 STATS_DESC_COUNTER(VCPU
, exit_pei
),
90 STATS_DESC_COUNTER(VCPU
, halt_no_poll_steal
),
91 STATS_DESC_COUNTER(VCPU
, instruction_lctl
),
92 STATS_DESC_COUNTER(VCPU
, instruction_lctlg
),
93 STATS_DESC_COUNTER(VCPU
, instruction_stctl
),
94 STATS_DESC_COUNTER(VCPU
, instruction_stctg
),
95 STATS_DESC_COUNTER(VCPU
, exit_program_interruption
),
96 STATS_DESC_COUNTER(VCPU
, exit_instr_and_program
),
97 STATS_DESC_COUNTER(VCPU
, exit_operation_exception
),
98 STATS_DESC_COUNTER(VCPU
, deliver_ckc
),
99 STATS_DESC_COUNTER(VCPU
, deliver_cputm
),
100 STATS_DESC_COUNTER(VCPU
, deliver_external_call
),
101 STATS_DESC_COUNTER(VCPU
, deliver_emergency_signal
),
102 STATS_DESC_COUNTER(VCPU
, deliver_service_signal
),
103 STATS_DESC_COUNTER(VCPU
, deliver_virtio
),
104 STATS_DESC_COUNTER(VCPU
, deliver_stop_signal
),
105 STATS_DESC_COUNTER(VCPU
, deliver_prefix_signal
),
106 STATS_DESC_COUNTER(VCPU
, deliver_restart_signal
),
107 STATS_DESC_COUNTER(VCPU
, deliver_program
),
108 STATS_DESC_COUNTER(VCPU
, deliver_io
),
109 STATS_DESC_COUNTER(VCPU
, deliver_machine_check
),
110 STATS_DESC_COUNTER(VCPU
, exit_wait_state
),
111 STATS_DESC_COUNTER(VCPU
, inject_ckc
),
112 STATS_DESC_COUNTER(VCPU
, inject_cputm
),
113 STATS_DESC_COUNTER(VCPU
, inject_external_call
),
114 STATS_DESC_COUNTER(VCPU
, inject_emergency_signal
),
115 STATS_DESC_COUNTER(VCPU
, inject_mchk
),
116 STATS_DESC_COUNTER(VCPU
, inject_pfault_init
),
117 STATS_DESC_COUNTER(VCPU
, inject_program
),
118 STATS_DESC_COUNTER(VCPU
, inject_restart
),
119 STATS_DESC_COUNTER(VCPU
, inject_set_prefix
),
120 STATS_DESC_COUNTER(VCPU
, inject_stop_signal
),
121 STATS_DESC_COUNTER(VCPU
, instruction_epsw
),
122 STATS_DESC_COUNTER(VCPU
, instruction_gs
),
123 STATS_DESC_COUNTER(VCPU
, instruction_io_other
),
124 STATS_DESC_COUNTER(VCPU
, instruction_lpsw
),
125 STATS_DESC_COUNTER(VCPU
, instruction_lpswe
),
126 STATS_DESC_COUNTER(VCPU
, instruction_pfmf
),
127 STATS_DESC_COUNTER(VCPU
, instruction_ptff
),
128 STATS_DESC_COUNTER(VCPU
, instruction_sck
),
129 STATS_DESC_COUNTER(VCPU
, instruction_sckpf
),
130 STATS_DESC_COUNTER(VCPU
, instruction_stidp
),
131 STATS_DESC_COUNTER(VCPU
, instruction_spx
),
132 STATS_DESC_COUNTER(VCPU
, instruction_stpx
),
133 STATS_DESC_COUNTER(VCPU
, instruction_stap
),
134 STATS_DESC_COUNTER(VCPU
, instruction_iske
),
135 STATS_DESC_COUNTER(VCPU
, instruction_ri
),
136 STATS_DESC_COUNTER(VCPU
, instruction_rrbe
),
137 STATS_DESC_COUNTER(VCPU
, instruction_sske
),
138 STATS_DESC_COUNTER(VCPU
, instruction_ipte_interlock
),
139 STATS_DESC_COUNTER(VCPU
, instruction_stsi
),
140 STATS_DESC_COUNTER(VCPU
, instruction_stfl
),
141 STATS_DESC_COUNTER(VCPU
, instruction_tb
),
142 STATS_DESC_COUNTER(VCPU
, instruction_tpi
),
143 STATS_DESC_COUNTER(VCPU
, instruction_tprot
),
144 STATS_DESC_COUNTER(VCPU
, instruction_tsch
),
145 STATS_DESC_COUNTER(VCPU
, instruction_sie
),
146 STATS_DESC_COUNTER(VCPU
, instruction_essa
),
147 STATS_DESC_COUNTER(VCPU
, instruction_sthyi
),
148 STATS_DESC_COUNTER(VCPU
, instruction_sigp_sense
),
149 STATS_DESC_COUNTER(VCPU
, instruction_sigp_sense_running
),
150 STATS_DESC_COUNTER(VCPU
, instruction_sigp_external_call
),
151 STATS_DESC_COUNTER(VCPU
, instruction_sigp_emergency
),
152 STATS_DESC_COUNTER(VCPU
, instruction_sigp_cond_emergency
),
153 STATS_DESC_COUNTER(VCPU
, instruction_sigp_start
),
154 STATS_DESC_COUNTER(VCPU
, instruction_sigp_stop
),
155 STATS_DESC_COUNTER(VCPU
, instruction_sigp_stop_store_status
),
156 STATS_DESC_COUNTER(VCPU
, instruction_sigp_store_status
),
157 STATS_DESC_COUNTER(VCPU
, instruction_sigp_store_adtl_status
),
158 STATS_DESC_COUNTER(VCPU
, instruction_sigp_arch
),
159 STATS_DESC_COUNTER(VCPU
, instruction_sigp_prefix
),
160 STATS_DESC_COUNTER(VCPU
, instruction_sigp_restart
),
161 STATS_DESC_COUNTER(VCPU
, instruction_sigp_init_cpu_reset
),
162 STATS_DESC_COUNTER(VCPU
, instruction_sigp_cpu_reset
),
163 STATS_DESC_COUNTER(VCPU
, instruction_sigp_unknown
),
164 STATS_DESC_COUNTER(VCPU
, instruction_diagnose_10
),
165 STATS_DESC_COUNTER(VCPU
, instruction_diagnose_44
),
166 STATS_DESC_COUNTER(VCPU
, instruction_diagnose_9c
),
167 STATS_DESC_COUNTER(VCPU
, diag_9c_ignored
),
168 STATS_DESC_COUNTER(VCPU
, diag_9c_forward
),
169 STATS_DESC_COUNTER(VCPU
, instruction_diagnose_258
),
170 STATS_DESC_COUNTER(VCPU
, instruction_diagnose_308
),
171 STATS_DESC_COUNTER(VCPU
, instruction_diagnose_500
),
172 STATS_DESC_COUNTER(VCPU
, instruction_diagnose_other
),
173 STATS_DESC_COUNTER(VCPU
, pfault_sync
)
176 const struct kvm_stats_header kvm_vcpu_stats_header
= {
177 .name_size
= KVM_STATS_NAME_SIZE
,
178 .num_desc
= ARRAY_SIZE(kvm_vcpu_stats_desc
),
179 .id_offset
= sizeof(struct kvm_stats_header
),
180 .desc_offset
= sizeof(struct kvm_stats_header
) + KVM_STATS_NAME_SIZE
,
181 .data_offset
= sizeof(struct kvm_stats_header
) + KVM_STATS_NAME_SIZE
+
182 sizeof(kvm_vcpu_stats_desc
),
185 /* allow nested virtualization in KVM (if enabled by user space) */
187 module_param(nested
, int, S_IRUGO
);
188 MODULE_PARM_DESC(nested
, "Nested virtualization support");
190 /* allow 1m huge page guest backing, if !nested */
192 module_param(hpage
, int, 0444);
193 MODULE_PARM_DESC(hpage
, "1m huge page backing support");
195 /* maximum percentage of steal time for polling. >100 is treated like 100 */
196 static u8 halt_poll_max_steal
= 10;
197 module_param(halt_poll_max_steal
, byte
, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal
, "Maximum percentage of steal time to allow polling");
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa
= true;
202 module_param(use_gisa
, bool, 0644);
203 MODULE_PARM_DESC(use_gisa
, "Use the GISA if the host supports it.");
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz
;
207 module_param(diag9c_forwarding_hz
, uint
, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz
, "Maximum diag9c forwarding per second, 0 to turn off");
211 * For now we handle at most 16 double words as this is what the s390 base
212 * kernel handles and stores in the prefix page. If we ever need to go beyond
213 * this, this requires changes to code, but the external uapi can stay.
215 #define SIZE_INTERNAL 16
218 * Base feature mask that defines default mask for facilities. Consists of the
219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
221 static unsigned long kvm_s390_fac_base
[SIZE_INTERNAL
] = { FACILITIES_KVM
};
223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224 * and defines the facilities that can be enabled via a cpu model.
226 static unsigned long kvm_s390_fac_ext
[SIZE_INTERNAL
] = { FACILITIES_KVM_CPUMODEL
};
228 static unsigned long kvm_s390_fac_size(void)
230 BUILD_BUG_ON(SIZE_INTERNAL
> S390_ARCH_FAC_MASK_SIZE_U64
);
231 BUILD_BUG_ON(SIZE_INTERNAL
> S390_ARCH_FAC_LIST_SIZE_U64
);
232 BUILD_BUG_ON(SIZE_INTERNAL
* sizeof(unsigned long) >
233 sizeof(stfle_fac_list
));
235 return SIZE_INTERNAL
;
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat
, KVM_S390_VM_CPU_FEAT_NR_BITS
);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc
;
243 static struct gmap_notifier gmap_notifier
;
244 static struct gmap_notifier vsie_gmap_notifier
;
245 debug_info_t
*kvm_s390_dbf
;
246 debug_info_t
*kvm_s390_dbf_uv
;
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
251 /* every s390 is virtualization enabled ;-) */
255 int kvm_arch_check_processor_compat(void *opaque
)
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap
*gmap
, unsigned long start
,
263 static int sca_switch_to_extended(struct kvm
*kvm
);
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block
*scb
, u64 delta
)
270 * The TOD jumps by delta, we have to compensate this by adding
271 * -delta to the epoch.
275 /* sign-extension - we're adding to signed values below */
280 if (scb
->ecd
& ECD_MEF
) {
281 scb
->epdx
+= delta_idx
;
282 if (scb
->epoch
< delta
)
288 * This callback is executed during stop_machine(). All CPUs are therefore
289 * temporarily stopped. In order not to change guest behavior, we have to
290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291 * so a CPU won't be stopped while calculating with the epoch.
293 static int kvm_clock_sync(struct notifier_block
*notifier
, unsigned long val
,
297 struct kvm_vcpu
*vcpu
;
299 unsigned long long *delta
= v
;
301 list_for_each_entry(kvm
, &vm_list
, vm_list
) {
302 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
303 kvm_clock_sync_scb(vcpu
->arch
.sie_block
, *delta
);
305 kvm
->arch
.epoch
= vcpu
->arch
.sie_block
->epoch
;
306 kvm
->arch
.epdx
= vcpu
->arch
.sie_block
->epdx
;
308 if (vcpu
->arch
.cputm_enabled
)
309 vcpu
->arch
.cputm_start
+= *delta
;
310 if (vcpu
->arch
.vsie_block
)
311 kvm_clock_sync_scb(vcpu
->arch
.vsie_block
,
318 static struct notifier_block kvm_clock_notifier
= {
319 .notifier_call
= kvm_clock_sync
,
322 int kvm_arch_hardware_setup(void *opaque
)
324 gmap_notifier
.notifier_call
= kvm_gmap_notifier
;
325 gmap_register_pte_notifier(&gmap_notifier
);
326 vsie_gmap_notifier
.notifier_call
= kvm_s390_vsie_gmap_notifier
;
327 gmap_register_pte_notifier(&vsie_gmap_notifier
);
328 atomic_notifier_chain_register(&s390_epoch_delta_notifier
,
329 &kvm_clock_notifier
);
333 void kvm_arch_hardware_unsetup(void)
335 gmap_unregister_pte_notifier(&gmap_notifier
);
336 gmap_unregister_pte_notifier(&vsie_gmap_notifier
);
337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier
,
338 &kvm_clock_notifier
);
341 static void allow_cpu_feat(unsigned long nr
)
343 set_bit_inv(nr
, kvm_s390_available_cpu_feat
);
346 static inline int plo_test_bit(unsigned char nr
)
348 unsigned long function
= (unsigned long)nr
| 0x100;
352 " lgr 0,%[function]\n"
353 /* Parameter registers are ignored for "test bit" */
358 : [function
] "d" (function
)
363 static __always_inline
void __insn32_query(unsigned int opcode
, u8
*query
)
368 /* Parameter registers are ignored */
369 " .insn rrf,%[opc] << 16,2,4,6,0\n"
371 : [query
] "d" ((unsigned long)query
), [opc
] "i" (opcode
)
372 : "cc", "memory", "0", "1");
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
378 static void kvm_s390_cpu_feat_init(void)
382 for (i
= 0; i
< 256; ++i
) {
384 kvm_s390_available_subfunc
.plo
[i
>> 3] |= 0x80 >> (i
& 7);
387 if (test_facility(28)) /* TOD-clock steering */
388 ptff(kvm_s390_available_subfunc
.ptff
,
389 sizeof(kvm_s390_available_subfunc
.ptff
),
392 if (test_facility(17)) { /* MSA */
393 __cpacf_query(CPACF_KMAC
, (cpacf_mask_t
*)
394 kvm_s390_available_subfunc
.kmac
);
395 __cpacf_query(CPACF_KMC
, (cpacf_mask_t
*)
396 kvm_s390_available_subfunc
.kmc
);
397 __cpacf_query(CPACF_KM
, (cpacf_mask_t
*)
398 kvm_s390_available_subfunc
.km
);
399 __cpacf_query(CPACF_KIMD
, (cpacf_mask_t
*)
400 kvm_s390_available_subfunc
.kimd
);
401 __cpacf_query(CPACF_KLMD
, (cpacf_mask_t
*)
402 kvm_s390_available_subfunc
.klmd
);
404 if (test_facility(76)) /* MSA3 */
405 __cpacf_query(CPACF_PCKMO
, (cpacf_mask_t
*)
406 kvm_s390_available_subfunc
.pckmo
);
407 if (test_facility(77)) { /* MSA4 */
408 __cpacf_query(CPACF_KMCTR
, (cpacf_mask_t
*)
409 kvm_s390_available_subfunc
.kmctr
);
410 __cpacf_query(CPACF_KMF
, (cpacf_mask_t
*)
411 kvm_s390_available_subfunc
.kmf
);
412 __cpacf_query(CPACF_KMO
, (cpacf_mask_t
*)
413 kvm_s390_available_subfunc
.kmo
);
414 __cpacf_query(CPACF_PCC
, (cpacf_mask_t
*)
415 kvm_s390_available_subfunc
.pcc
);
417 if (test_facility(57)) /* MSA5 */
418 __cpacf_query(CPACF_PRNO
, (cpacf_mask_t
*)
419 kvm_s390_available_subfunc
.ppno
);
421 if (test_facility(146)) /* MSA8 */
422 __cpacf_query(CPACF_KMA
, (cpacf_mask_t
*)
423 kvm_s390_available_subfunc
.kma
);
425 if (test_facility(155)) /* MSA9 */
426 __cpacf_query(CPACF_KDSA
, (cpacf_mask_t
*)
427 kvm_s390_available_subfunc
.kdsa
);
429 if (test_facility(150)) /* SORTL */
430 __insn32_query(INSN_SORTL
, kvm_s390_available_subfunc
.sortl
);
432 if (test_facility(151)) /* DFLTCC */
433 __insn32_query(INSN_DFLTCC
, kvm_s390_available_subfunc
.dfltcc
);
435 if (MACHINE_HAS_ESOP
)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP
);
438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
441 if (!sclp
.has_sief2
|| !MACHINE_HAS_ESOP
|| !sclp
.has_64bscao
||
442 !test_facility(3) || !nested
)
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2
);
445 if (sclp
.has_64bscao
)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO
);
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF
);
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE
);
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS
);
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB
);
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI
);
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS
);
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS
);
462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 * all skey handling functions read/set the skey from the PGSTE
464 * instead of the real storage key.
466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 * pages being detected as preserved although they are resident.
469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 * cannot easily shadow the SCA because of the ipte lock.
481 int kvm_arch_init(void *opaque
)
485 kvm_s390_dbf
= debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
489 kvm_s390_dbf_uv
= debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf_uv
)
493 if (debug_register_view(kvm_s390_dbf
, &debug_sprintf_view
) ||
494 debug_register_view(kvm_s390_dbf_uv
, &debug_sprintf_view
))
497 kvm_s390_cpu_feat_init();
499 /* Register floating interrupt controller interface. */
500 rc
= kvm_register_device_ops(&kvm_flic_ops
, KVM_DEV_TYPE_FLIC
);
502 pr_err("A FLIC registration call failed with rc=%d\n", rc
);
506 rc
= kvm_s390_gib_init(GAL_ISC
);
517 void kvm_arch_exit(void)
519 kvm_s390_gib_destroy();
520 debug_unregister(kvm_s390_dbf
);
521 debug_unregister(kvm_s390_dbf_uv
);
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file
*filp
,
526 unsigned int ioctl
, unsigned long arg
)
528 if (ioctl
== KVM_S390_ENABLE_SIE
)
529 return s390_enable_sie();
533 int kvm_vm_ioctl_check_extension(struct kvm
*kvm
, long ext
)
538 case KVM_CAP_S390_PSW
:
539 case KVM_CAP_S390_GMAP
:
540 case KVM_CAP_SYNC_MMU
:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 case KVM_CAP_S390_UCONTROL
:
544 case KVM_CAP_ASYNC_PF
:
545 case KVM_CAP_SYNC_REGS
:
546 case KVM_CAP_ONE_REG
:
547 case KVM_CAP_ENABLE_CAP
:
548 case KVM_CAP_S390_CSS_SUPPORT
:
549 case KVM_CAP_IOEVENTFD
:
550 case KVM_CAP_DEVICE_CTRL
:
551 case KVM_CAP_S390_IRQCHIP
:
552 case KVM_CAP_VM_ATTRIBUTES
:
553 case KVM_CAP_MP_STATE
:
554 case KVM_CAP_IMMEDIATE_EXIT
:
555 case KVM_CAP_S390_INJECT_IRQ
:
556 case KVM_CAP_S390_USER_SIGP
:
557 case KVM_CAP_S390_USER_STSI
:
558 case KVM_CAP_S390_SKEYS
:
559 case KVM_CAP_S390_IRQ_STATE
:
560 case KVM_CAP_S390_USER_INSTR0
:
561 case KVM_CAP_S390_CMMA_MIGRATION
:
562 case KVM_CAP_S390_AIS
:
563 case KVM_CAP_S390_AIS_MIGRATION
:
564 case KVM_CAP_S390_VCPU_RESETS
:
565 case KVM_CAP_SET_GUEST_DEBUG
:
566 case KVM_CAP_S390_DIAG318
:
567 case KVM_CAP_S390_MEM_OP_EXTENSION
:
570 case KVM_CAP_SET_GUEST_DEBUG2
:
571 r
= KVM_GUESTDBG_VALID_MASK
;
573 case KVM_CAP_S390_HPAGE_1M
:
575 if (hpage
&& !kvm_is_ucontrol(kvm
))
578 case KVM_CAP_S390_MEM_OP
:
581 case KVM_CAP_NR_VCPUS
:
582 case KVM_CAP_MAX_VCPUS
:
583 case KVM_CAP_MAX_VCPU_ID
:
584 r
= KVM_S390_BSCA_CPU_SLOTS
;
585 if (!kvm_s390_use_sca_entries())
587 else if (sclp
.has_esca
&& sclp
.has_64bscao
)
588 r
= KVM_S390_ESCA_CPU_SLOTS
;
590 case KVM_CAP_S390_COW
:
591 r
= MACHINE_HAS_ESOP
;
593 case KVM_CAP_S390_VECTOR_REGISTERS
:
596 case KVM_CAP_S390_RI
:
597 r
= test_facility(64);
599 case KVM_CAP_S390_GS
:
600 r
= test_facility(133);
602 case KVM_CAP_S390_BPB
:
603 r
= test_facility(82);
605 case KVM_CAP_S390_PROTECTED
:
606 r
= is_prot_virt_host();
614 void kvm_arch_sync_dirty_log(struct kvm
*kvm
, struct kvm_memory_slot
*memslot
)
617 gfn_t cur_gfn
, last_gfn
;
618 unsigned long gaddr
, vmaddr
;
619 struct gmap
*gmap
= kvm
->arch
.gmap
;
620 DECLARE_BITMAP(bitmap
, _PAGE_ENTRIES
);
622 /* Loop over all guest segments */
623 cur_gfn
= memslot
->base_gfn
;
624 last_gfn
= memslot
->base_gfn
+ memslot
->npages
;
625 for (; cur_gfn
<= last_gfn
; cur_gfn
+= _PAGE_ENTRIES
) {
626 gaddr
= gfn_to_gpa(cur_gfn
);
627 vmaddr
= gfn_to_hva_memslot(memslot
, cur_gfn
);
628 if (kvm_is_error_hva(vmaddr
))
631 bitmap_zero(bitmap
, _PAGE_ENTRIES
);
632 gmap_sync_dirty_log_pmd(gmap
, bitmap
, gaddr
, vmaddr
);
633 for (i
= 0; i
< _PAGE_ENTRIES
; i
++) {
634 if (test_bit(i
, bitmap
))
635 mark_page_dirty(kvm
, cur_gfn
+ i
);
638 if (fatal_signal_pending(current
))
644 /* Section: vm related */
645 static void sca_del_vcpu(struct kvm_vcpu
*vcpu
);
648 * Get (and clear) the dirty memory log for a memory slot.
650 int kvm_vm_ioctl_get_dirty_log(struct kvm
*kvm
,
651 struct kvm_dirty_log
*log
)
655 struct kvm_memory_slot
*memslot
;
658 if (kvm_is_ucontrol(kvm
))
661 mutex_lock(&kvm
->slots_lock
);
664 if (log
->slot
>= KVM_USER_MEM_SLOTS
)
667 r
= kvm_get_dirty_log(kvm
, log
, &is_dirty
, &memslot
);
671 /* Clear the dirty log */
673 n
= kvm_dirty_bitmap_bytes(memslot
);
674 memset(memslot
->dirty_bitmap
, 0, n
);
678 mutex_unlock(&kvm
->slots_lock
);
682 static void icpt_operexc_on_all_vcpus(struct kvm
*kvm
)
685 struct kvm_vcpu
*vcpu
;
687 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
688 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC
, vcpu
);
692 int kvm_vm_ioctl_enable_cap(struct kvm
*kvm
, struct kvm_enable_cap
*cap
)
700 case KVM_CAP_S390_IRQCHIP
:
701 VM_EVENT(kvm
, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
702 kvm
->arch
.use_irqchip
= 1;
705 case KVM_CAP_S390_USER_SIGP
:
706 VM_EVENT(kvm
, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
707 kvm
->arch
.user_sigp
= 1;
710 case KVM_CAP_S390_VECTOR_REGISTERS
:
711 mutex_lock(&kvm
->lock
);
712 if (kvm
->created_vcpus
) {
714 } else if (MACHINE_HAS_VX
) {
715 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 129);
716 set_kvm_facility(kvm
->arch
.model
.fac_list
, 129);
717 if (test_facility(134)) {
718 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 134);
719 set_kvm_facility(kvm
->arch
.model
.fac_list
, 134);
721 if (test_facility(135)) {
722 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 135);
723 set_kvm_facility(kvm
->arch
.model
.fac_list
, 135);
725 if (test_facility(148)) {
726 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 148);
727 set_kvm_facility(kvm
->arch
.model
.fac_list
, 148);
729 if (test_facility(152)) {
730 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 152);
731 set_kvm_facility(kvm
->arch
.model
.fac_list
, 152);
733 if (test_facility(192)) {
734 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 192);
735 set_kvm_facility(kvm
->arch
.model
.fac_list
, 192);
740 mutex_unlock(&kvm
->lock
);
741 VM_EVENT(kvm
, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
742 r
? "(not available)" : "(success)");
744 case KVM_CAP_S390_RI
:
746 mutex_lock(&kvm
->lock
);
747 if (kvm
->created_vcpus
) {
749 } else if (test_facility(64)) {
750 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 64);
751 set_kvm_facility(kvm
->arch
.model
.fac_list
, 64);
754 mutex_unlock(&kvm
->lock
);
755 VM_EVENT(kvm
, 3, "ENABLE: CAP_S390_RI %s",
756 r
? "(not available)" : "(success)");
758 case KVM_CAP_S390_AIS
:
759 mutex_lock(&kvm
->lock
);
760 if (kvm
->created_vcpus
) {
763 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 72);
764 set_kvm_facility(kvm
->arch
.model
.fac_list
, 72);
767 mutex_unlock(&kvm
->lock
);
768 VM_EVENT(kvm
, 3, "ENABLE: AIS %s",
769 r
? "(not available)" : "(success)");
771 case KVM_CAP_S390_GS
:
773 mutex_lock(&kvm
->lock
);
774 if (kvm
->created_vcpus
) {
776 } else if (test_facility(133)) {
777 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 133);
778 set_kvm_facility(kvm
->arch
.model
.fac_list
, 133);
781 mutex_unlock(&kvm
->lock
);
782 VM_EVENT(kvm
, 3, "ENABLE: CAP_S390_GS %s",
783 r
? "(not available)" : "(success)");
785 case KVM_CAP_S390_HPAGE_1M
:
786 mutex_lock(&kvm
->lock
);
787 if (kvm
->created_vcpus
)
789 else if (!hpage
|| kvm
->arch
.use_cmma
|| kvm_is_ucontrol(kvm
))
793 mmap_write_lock(kvm
->mm
);
794 kvm
->mm
->context
.allow_gmap_hpage_1m
= 1;
795 mmap_write_unlock(kvm
->mm
);
797 * We might have to create fake 4k page
798 * tables. To avoid that the hardware works on
799 * stale PGSTEs, we emulate these instructions.
801 kvm
->arch
.use_skf
= 0;
802 kvm
->arch
.use_pfmfi
= 0;
804 mutex_unlock(&kvm
->lock
);
805 VM_EVENT(kvm
, 3, "ENABLE: CAP_S390_HPAGE %s",
806 r
? "(not available)" : "(success)");
808 case KVM_CAP_S390_USER_STSI
:
809 VM_EVENT(kvm
, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
810 kvm
->arch
.user_stsi
= 1;
813 case KVM_CAP_S390_USER_INSTR0
:
814 VM_EVENT(kvm
, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
815 kvm
->arch
.user_instr0
= 1;
816 icpt_operexc_on_all_vcpus(kvm
);
826 static int kvm_s390_get_mem_control(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
830 switch (attr
->attr
) {
831 case KVM_S390_VM_MEM_LIMIT_SIZE
:
833 VM_EVENT(kvm
, 3, "QUERY: max guest memory: %lu bytes",
834 kvm
->arch
.mem_limit
);
835 if (put_user(kvm
->arch
.mem_limit
, (u64 __user
*)attr
->addr
))
845 static int kvm_s390_set_mem_control(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
849 switch (attr
->attr
) {
850 case KVM_S390_VM_MEM_ENABLE_CMMA
:
855 VM_EVENT(kvm
, 3, "%s", "ENABLE: CMMA support");
856 mutex_lock(&kvm
->lock
);
857 if (kvm
->created_vcpus
)
859 else if (kvm
->mm
->context
.allow_gmap_hpage_1m
)
862 kvm
->arch
.use_cmma
= 1;
863 /* Not compatible with cmma. */
864 kvm
->arch
.use_pfmfi
= 0;
867 mutex_unlock(&kvm
->lock
);
869 case KVM_S390_VM_MEM_CLR_CMMA
:
874 if (!kvm
->arch
.use_cmma
)
877 VM_EVENT(kvm
, 3, "%s", "RESET: CMMA states");
878 mutex_lock(&kvm
->lock
);
879 idx
= srcu_read_lock(&kvm
->srcu
);
880 s390_reset_cmma(kvm
->arch
.gmap
->mm
);
881 srcu_read_unlock(&kvm
->srcu
, idx
);
882 mutex_unlock(&kvm
->lock
);
885 case KVM_S390_VM_MEM_LIMIT_SIZE
: {
886 unsigned long new_limit
;
888 if (kvm_is_ucontrol(kvm
))
891 if (get_user(new_limit
, (u64 __user
*)attr
->addr
))
894 if (kvm
->arch
.mem_limit
!= KVM_S390_NO_MEM_LIMIT
&&
895 new_limit
> kvm
->arch
.mem_limit
)
901 /* gmap_create takes last usable address */
902 if (new_limit
!= KVM_S390_NO_MEM_LIMIT
)
906 mutex_lock(&kvm
->lock
);
907 if (!kvm
->created_vcpus
) {
908 /* gmap_create will round the limit up */
909 struct gmap
*new = gmap_create(current
->mm
, new_limit
);
914 gmap_remove(kvm
->arch
.gmap
);
916 kvm
->arch
.gmap
= new;
920 mutex_unlock(&kvm
->lock
);
921 VM_EVENT(kvm
, 3, "SET: max guest address: %lu", new_limit
);
922 VM_EVENT(kvm
, 3, "New guest asce: 0x%pK",
923 (void *) kvm
->arch
.gmap
->asce
);
933 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu
*vcpu
);
935 void kvm_s390_vcpu_crypto_reset_all(struct kvm
*kvm
)
937 struct kvm_vcpu
*vcpu
;
940 kvm_s390_vcpu_block_all(kvm
);
942 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
943 kvm_s390_vcpu_crypto_setup(vcpu
);
944 /* recreate the shadow crycb by leaving the VSIE handler */
945 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART
, vcpu
);
948 kvm_s390_vcpu_unblock_all(kvm
);
951 static int kvm_s390_vm_set_crypto(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
953 mutex_lock(&kvm
->lock
);
954 switch (attr
->attr
) {
955 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW
:
956 if (!test_kvm_facility(kvm
, 76)) {
957 mutex_unlock(&kvm
->lock
);
961 kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
,
962 sizeof(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
));
963 kvm
->arch
.crypto
.aes_kw
= 1;
964 VM_EVENT(kvm
, 3, "%s", "ENABLE: AES keywrapping support");
966 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW
:
967 if (!test_kvm_facility(kvm
, 76)) {
968 mutex_unlock(&kvm
->lock
);
972 kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
,
973 sizeof(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
));
974 kvm
->arch
.crypto
.dea_kw
= 1;
975 VM_EVENT(kvm
, 3, "%s", "ENABLE: DEA keywrapping support");
977 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW
:
978 if (!test_kvm_facility(kvm
, 76)) {
979 mutex_unlock(&kvm
->lock
);
982 kvm
->arch
.crypto
.aes_kw
= 0;
983 memset(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
, 0,
984 sizeof(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
));
985 VM_EVENT(kvm
, 3, "%s", "DISABLE: AES keywrapping support");
987 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW
:
988 if (!test_kvm_facility(kvm
, 76)) {
989 mutex_unlock(&kvm
->lock
);
992 kvm
->arch
.crypto
.dea_kw
= 0;
993 memset(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
, 0,
994 sizeof(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
));
995 VM_EVENT(kvm
, 3, "%s", "DISABLE: DEA keywrapping support");
997 case KVM_S390_VM_CRYPTO_ENABLE_APIE
:
998 if (!ap_instructions_available()) {
999 mutex_unlock(&kvm
->lock
);
1002 kvm
->arch
.crypto
.apie
= 1;
1004 case KVM_S390_VM_CRYPTO_DISABLE_APIE
:
1005 if (!ap_instructions_available()) {
1006 mutex_unlock(&kvm
->lock
);
1009 kvm
->arch
.crypto
.apie
= 0;
1012 mutex_unlock(&kvm
->lock
);
1016 kvm_s390_vcpu_crypto_reset_all(kvm
);
1017 mutex_unlock(&kvm
->lock
);
1021 static void kvm_s390_sync_request_broadcast(struct kvm
*kvm
, int req
)
1024 struct kvm_vcpu
*vcpu
;
1026 kvm_for_each_vcpu(cx
, vcpu
, kvm
)
1027 kvm_s390_sync_request(req
, vcpu
);
1031 * Must be called with kvm->srcu held to avoid races on memslots, and with
1032 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034 static int kvm_s390_vm_start_migration(struct kvm
*kvm
)
1036 struct kvm_memory_slot
*ms
;
1037 struct kvm_memslots
*slots
;
1038 unsigned long ram_pages
= 0;
1041 /* migration mode already enabled */
1042 if (kvm
->arch
.migration_mode
)
1044 slots
= kvm_memslots(kvm
);
1045 if (!slots
|| !slots
->used_slots
)
1048 if (!kvm
->arch
.use_cmma
) {
1049 kvm
->arch
.migration_mode
= 1;
1052 /* mark all the pages in active slots as dirty */
1053 for (slotnr
= 0; slotnr
< slots
->used_slots
; slotnr
++) {
1054 ms
= slots
->memslots
+ slotnr
;
1055 if (!ms
->dirty_bitmap
)
1058 * The second half of the bitmap is only used on x86,
1059 * and would be wasted otherwise, so we put it to good
1060 * use here to keep track of the state of the storage
1063 memset(kvm_second_dirty_bitmap(ms
), 0xff, kvm_dirty_bitmap_bytes(ms
));
1064 ram_pages
+= ms
->npages
;
1066 atomic64_set(&kvm
->arch
.cmma_dirty_pages
, ram_pages
);
1067 kvm
->arch
.migration_mode
= 1;
1068 kvm_s390_sync_request_broadcast(kvm
, KVM_REQ_START_MIGRATION
);
1073 * Must be called with kvm->slots_lock to avoid races with ourselves and
1074 * kvm_s390_vm_start_migration.
1076 static int kvm_s390_vm_stop_migration(struct kvm
*kvm
)
1078 /* migration mode already disabled */
1079 if (!kvm
->arch
.migration_mode
)
1081 kvm
->arch
.migration_mode
= 0;
1082 if (kvm
->arch
.use_cmma
)
1083 kvm_s390_sync_request_broadcast(kvm
, KVM_REQ_STOP_MIGRATION
);
1087 static int kvm_s390_vm_set_migration(struct kvm
*kvm
,
1088 struct kvm_device_attr
*attr
)
1092 mutex_lock(&kvm
->slots_lock
);
1093 switch (attr
->attr
) {
1094 case KVM_S390_VM_MIGRATION_START
:
1095 res
= kvm_s390_vm_start_migration(kvm
);
1097 case KVM_S390_VM_MIGRATION_STOP
:
1098 res
= kvm_s390_vm_stop_migration(kvm
);
1103 mutex_unlock(&kvm
->slots_lock
);
1108 static int kvm_s390_vm_get_migration(struct kvm
*kvm
,
1109 struct kvm_device_attr
*attr
)
1111 u64 mig
= kvm
->arch
.migration_mode
;
1113 if (attr
->attr
!= KVM_S390_VM_MIGRATION_STATUS
)
1116 if (copy_to_user((void __user
*)attr
->addr
, &mig
, sizeof(mig
)))
1121 static int kvm_s390_set_tod_ext(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1123 struct kvm_s390_vm_tod_clock gtod
;
1125 if (copy_from_user(>od
, (void __user
*)attr
->addr
, sizeof(gtod
)))
1128 if (!test_kvm_facility(kvm
, 139) && gtod
.epoch_idx
)
1130 kvm_s390_set_tod_clock(kvm
, >od
);
1132 VM_EVENT(kvm
, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 gtod
.epoch_idx
, gtod
.tod
);
1138 static int kvm_s390_set_tod_high(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1142 if (copy_from_user(>od_high
, (void __user
*)attr
->addr
,
1148 VM_EVENT(kvm
, 3, "SET: TOD extension: 0x%x", gtod_high
);
1153 static int kvm_s390_set_tod_low(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1155 struct kvm_s390_vm_tod_clock gtod
= { 0 };
1157 if (copy_from_user(>od
.tod
, (void __user
*)attr
->addr
,
1161 kvm_s390_set_tod_clock(kvm
, >od
);
1162 VM_EVENT(kvm
, 3, "SET: TOD base: 0x%llx", gtod
.tod
);
1166 static int kvm_s390_set_tod(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1173 switch (attr
->attr
) {
1174 case KVM_S390_VM_TOD_EXT
:
1175 ret
= kvm_s390_set_tod_ext(kvm
, attr
);
1177 case KVM_S390_VM_TOD_HIGH
:
1178 ret
= kvm_s390_set_tod_high(kvm
, attr
);
1180 case KVM_S390_VM_TOD_LOW
:
1181 ret
= kvm_s390_set_tod_low(kvm
, attr
);
1190 static void kvm_s390_get_tod_clock(struct kvm
*kvm
,
1191 struct kvm_s390_vm_tod_clock
*gtod
)
1193 union tod_clock clk
;
1197 store_tod_clock_ext(&clk
);
1199 gtod
->tod
= clk
.tod
+ kvm
->arch
.epoch
;
1200 gtod
->epoch_idx
= 0;
1201 if (test_kvm_facility(kvm
, 139)) {
1202 gtod
->epoch_idx
= clk
.ei
+ kvm
->arch
.epdx
;
1203 if (gtod
->tod
< clk
.tod
)
1204 gtod
->epoch_idx
+= 1;
1210 static int kvm_s390_get_tod_ext(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1212 struct kvm_s390_vm_tod_clock gtod
;
1214 memset(>od
, 0, sizeof(gtod
));
1215 kvm_s390_get_tod_clock(kvm
, >od
);
1216 if (copy_to_user((void __user
*)attr
->addr
, >od
, sizeof(gtod
)))
1219 VM_EVENT(kvm
, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 gtod
.epoch_idx
, gtod
.tod
);
1224 static int kvm_s390_get_tod_high(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1228 if (copy_to_user((void __user
*)attr
->addr
, >od_high
,
1231 VM_EVENT(kvm
, 3, "QUERY: TOD extension: 0x%x", gtod_high
);
1236 static int kvm_s390_get_tod_low(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1240 gtod
= kvm_s390_get_tod_clock_fast(kvm
);
1241 if (copy_to_user((void __user
*)attr
->addr
, >od
, sizeof(gtod
)))
1243 VM_EVENT(kvm
, 3, "QUERY: TOD base: 0x%llx", gtod
);
1248 static int kvm_s390_get_tod(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1255 switch (attr
->attr
) {
1256 case KVM_S390_VM_TOD_EXT
:
1257 ret
= kvm_s390_get_tod_ext(kvm
, attr
);
1259 case KVM_S390_VM_TOD_HIGH
:
1260 ret
= kvm_s390_get_tod_high(kvm
, attr
);
1262 case KVM_S390_VM_TOD_LOW
:
1263 ret
= kvm_s390_get_tod_low(kvm
, attr
);
1272 static int kvm_s390_set_processor(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1274 struct kvm_s390_vm_cpu_processor
*proc
;
1275 u16 lowest_ibc
, unblocked_ibc
;
1278 mutex_lock(&kvm
->lock
);
1279 if (kvm
->created_vcpus
) {
1283 proc
= kzalloc(sizeof(*proc
), GFP_KERNEL_ACCOUNT
);
1288 if (!copy_from_user(proc
, (void __user
*)attr
->addr
,
1290 kvm
->arch
.model
.cpuid
= proc
->cpuid
;
1291 lowest_ibc
= sclp
.ibc
>> 16 & 0xfff;
1292 unblocked_ibc
= sclp
.ibc
& 0xfff;
1293 if (lowest_ibc
&& proc
->ibc
) {
1294 if (proc
->ibc
> unblocked_ibc
)
1295 kvm
->arch
.model
.ibc
= unblocked_ibc
;
1296 else if (proc
->ibc
< lowest_ibc
)
1297 kvm
->arch
.model
.ibc
= lowest_ibc
;
1299 kvm
->arch
.model
.ibc
= proc
->ibc
;
1301 memcpy(kvm
->arch
.model
.fac_list
, proc
->fac_list
,
1302 S390_ARCH_FAC_LIST_SIZE_BYTE
);
1303 VM_EVENT(kvm
, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 kvm
->arch
.model
.ibc
,
1305 kvm
->arch
.model
.cpuid
);
1306 VM_EVENT(kvm
, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 kvm
->arch
.model
.fac_list
[0],
1308 kvm
->arch
.model
.fac_list
[1],
1309 kvm
->arch
.model
.fac_list
[2]);
1314 mutex_unlock(&kvm
->lock
);
1318 static int kvm_s390_set_processor_feat(struct kvm
*kvm
,
1319 struct kvm_device_attr
*attr
)
1321 struct kvm_s390_vm_cpu_feat data
;
1323 if (copy_from_user(&data
, (void __user
*)attr
->addr
, sizeof(data
)))
1325 if (!bitmap_subset((unsigned long *) data
.feat
,
1326 kvm_s390_available_cpu_feat
,
1327 KVM_S390_VM_CPU_FEAT_NR_BITS
))
1330 mutex_lock(&kvm
->lock
);
1331 if (kvm
->created_vcpus
) {
1332 mutex_unlock(&kvm
->lock
);
1335 bitmap_copy(kvm
->arch
.cpu_feat
, (unsigned long *) data
.feat
,
1336 KVM_S390_VM_CPU_FEAT_NR_BITS
);
1337 mutex_unlock(&kvm
->lock
);
1338 VM_EVENT(kvm
, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1345 static int kvm_s390_set_processor_subfunc(struct kvm
*kvm
,
1346 struct kvm_device_attr
*attr
)
1348 mutex_lock(&kvm
->lock
);
1349 if (kvm
->created_vcpus
) {
1350 mutex_unlock(&kvm
->lock
);
1354 if (copy_from_user(&kvm
->arch
.model
.subfuncs
, (void __user
*)attr
->addr
,
1355 sizeof(struct kvm_s390_vm_cpu_subfunc
))) {
1356 mutex_unlock(&kvm
->lock
);
1359 mutex_unlock(&kvm
->lock
);
1361 VM_EVENT(kvm
, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[0],
1363 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[1],
1364 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[2],
1365 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[3]);
1366 VM_EVENT(kvm
, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1367 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ptff
)[0],
1368 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ptff
)[1]);
1369 VM_EVENT(kvm
, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmac
)[0],
1371 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmac
)[1]);
1372 VM_EVENT(kvm
, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmc
)[0],
1374 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmc
)[1]);
1375 VM_EVENT(kvm
, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.km
)[0],
1377 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.km
)[1]);
1378 VM_EVENT(kvm
, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kimd
)[0],
1380 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kimd
)[1]);
1381 VM_EVENT(kvm
, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.klmd
)[0],
1383 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.klmd
)[1]);
1384 VM_EVENT(kvm
, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pckmo
)[0],
1386 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pckmo
)[1]);
1387 VM_EVENT(kvm
, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmctr
)[0],
1389 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmctr
)[1]);
1390 VM_EVENT(kvm
, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1391 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmf
)[0],
1392 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmf
)[1]);
1393 VM_EVENT(kvm
, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1394 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmo
)[0],
1395 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmo
)[1]);
1396 VM_EVENT(kvm
, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1397 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pcc
)[0],
1398 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pcc
)[1]);
1399 VM_EVENT(kvm
, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1400 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ppno
)[0],
1401 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ppno
)[1]);
1402 VM_EVENT(kvm
, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kma
)[0],
1404 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kma
)[1]);
1405 VM_EVENT(kvm
, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1406 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kdsa
)[0],
1407 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kdsa
)[1]);
1408 VM_EVENT(kvm
, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[0],
1410 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[1],
1411 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[2],
1412 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[3]);
1413 VM_EVENT(kvm
, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[0],
1415 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[1],
1416 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[2],
1417 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[3]);
1422 static int kvm_s390_set_cpu_model(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1426 switch (attr
->attr
) {
1427 case KVM_S390_VM_CPU_PROCESSOR
:
1428 ret
= kvm_s390_set_processor(kvm
, attr
);
1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT
:
1431 ret
= kvm_s390_set_processor_feat(kvm
, attr
);
1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC
:
1434 ret
= kvm_s390_set_processor_subfunc(kvm
, attr
);
1440 static int kvm_s390_get_processor(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1442 struct kvm_s390_vm_cpu_processor
*proc
;
1445 proc
= kzalloc(sizeof(*proc
), GFP_KERNEL_ACCOUNT
);
1450 proc
->cpuid
= kvm
->arch
.model
.cpuid
;
1451 proc
->ibc
= kvm
->arch
.model
.ibc
;
1452 memcpy(&proc
->fac_list
, kvm
->arch
.model
.fac_list
,
1453 S390_ARCH_FAC_LIST_SIZE_BYTE
);
1454 VM_EVENT(kvm
, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 kvm
->arch
.model
.ibc
,
1456 kvm
->arch
.model
.cpuid
);
1457 VM_EVENT(kvm
, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 kvm
->arch
.model
.fac_list
[0],
1459 kvm
->arch
.model
.fac_list
[1],
1460 kvm
->arch
.model
.fac_list
[2]);
1461 if (copy_to_user((void __user
*)attr
->addr
, proc
, sizeof(*proc
)))
1468 static int kvm_s390_get_machine(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1470 struct kvm_s390_vm_cpu_machine
*mach
;
1473 mach
= kzalloc(sizeof(*mach
), GFP_KERNEL_ACCOUNT
);
1478 get_cpu_id((struct cpuid
*) &mach
->cpuid
);
1479 mach
->ibc
= sclp
.ibc
;
1480 memcpy(&mach
->fac_mask
, kvm
->arch
.model
.fac_mask
,
1481 S390_ARCH_FAC_LIST_SIZE_BYTE
);
1482 memcpy((unsigned long *)&mach
->fac_list
, stfle_fac_list
,
1483 sizeof(stfle_fac_list
));
1484 VM_EVENT(kvm
, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1485 kvm
->arch
.model
.ibc
,
1486 kvm
->arch
.model
.cpuid
);
1487 VM_EVENT(kvm
, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1491 VM_EVENT(kvm
, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1495 if (copy_to_user((void __user
*)attr
->addr
, mach
, sizeof(*mach
)))
1502 static int kvm_s390_get_processor_feat(struct kvm
*kvm
,
1503 struct kvm_device_attr
*attr
)
1505 struct kvm_s390_vm_cpu_feat data
;
1507 bitmap_copy((unsigned long *) data
.feat
, kvm
->arch
.cpu_feat
,
1508 KVM_S390_VM_CPU_FEAT_NR_BITS
);
1509 if (copy_to_user((void __user
*)attr
->addr
, &data
, sizeof(data
)))
1511 VM_EVENT(kvm
, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518 static int kvm_s390_get_machine_feat(struct kvm
*kvm
,
1519 struct kvm_device_attr
*attr
)
1521 struct kvm_s390_vm_cpu_feat data
;
1523 bitmap_copy((unsigned long *) data
.feat
,
1524 kvm_s390_available_cpu_feat
,
1525 KVM_S390_VM_CPU_FEAT_NR_BITS
);
1526 if (copy_to_user((void __user
*)attr
->addr
, &data
, sizeof(data
)))
1528 VM_EVENT(kvm
, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1535 static int kvm_s390_get_processor_subfunc(struct kvm
*kvm
,
1536 struct kvm_device_attr
*attr
)
1538 if (copy_to_user((void __user
*)attr
->addr
, &kvm
->arch
.model
.subfuncs
,
1539 sizeof(struct kvm_s390_vm_cpu_subfunc
)))
1542 VM_EVENT(kvm
, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[0],
1544 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[1],
1545 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[2],
1546 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.plo
)[3]);
1547 VM_EVENT(kvm
, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1548 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ptff
)[0],
1549 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ptff
)[1]);
1550 VM_EVENT(kvm
, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmac
)[0],
1552 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmac
)[1]);
1553 VM_EVENT(kvm
, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmc
)[0],
1555 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmc
)[1]);
1556 VM_EVENT(kvm
, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.km
)[0],
1558 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.km
)[1]);
1559 VM_EVENT(kvm
, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kimd
)[0],
1561 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kimd
)[1]);
1562 VM_EVENT(kvm
, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.klmd
)[0],
1564 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.klmd
)[1]);
1565 VM_EVENT(kvm
, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pckmo
)[0],
1567 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pckmo
)[1]);
1568 VM_EVENT(kvm
, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmctr
)[0],
1570 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmctr
)[1]);
1571 VM_EVENT(kvm
, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1572 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmf
)[0],
1573 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmf
)[1]);
1574 VM_EVENT(kvm
, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1575 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmo
)[0],
1576 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kmo
)[1]);
1577 VM_EVENT(kvm
, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1578 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pcc
)[0],
1579 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.pcc
)[1]);
1580 VM_EVENT(kvm
, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1581 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ppno
)[0],
1582 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.ppno
)[1]);
1583 VM_EVENT(kvm
, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kma
)[0],
1585 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kma
)[1]);
1586 VM_EVENT(kvm
, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kdsa
)[0],
1588 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.kdsa
)[1]);
1589 VM_EVENT(kvm
, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[0],
1591 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[1],
1592 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[2],
1593 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.sortl
)[3]);
1594 VM_EVENT(kvm
, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[0],
1596 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[1],
1597 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[2],
1598 ((unsigned long *) &kvm
->arch
.model
.subfuncs
.dfltcc
)[3]);
1603 static int kvm_s390_get_machine_subfunc(struct kvm
*kvm
,
1604 struct kvm_device_attr
*attr
)
1606 if (copy_to_user((void __user
*)attr
->addr
, &kvm_s390_available_subfunc
,
1607 sizeof(struct kvm_s390_vm_cpu_subfunc
)))
1610 VM_EVENT(kvm
, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc
.plo
)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc
.plo
)[1],
1613 ((unsigned long *) &kvm_s390_available_subfunc
.plo
)[2],
1614 ((unsigned long *) &kvm_s390_available_subfunc
.plo
)[3]);
1615 VM_EVENT(kvm
, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1616 ((unsigned long *) &kvm_s390_available_subfunc
.ptff
)[0],
1617 ((unsigned long *) &kvm_s390_available_subfunc
.ptff
)[1]);
1618 VM_EVENT(kvm
, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc
.kmac
)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc
.kmac
)[1]);
1621 VM_EVENT(kvm
, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc
.kmc
)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc
.kmc
)[1]);
1624 VM_EVENT(kvm
, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc
.km
)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc
.km
)[1]);
1627 VM_EVENT(kvm
, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc
.kimd
)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc
.kimd
)[1]);
1630 VM_EVENT(kvm
, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc
.klmd
)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc
.klmd
)[1]);
1633 VM_EVENT(kvm
, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc
.pckmo
)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc
.pckmo
)[1]);
1636 VM_EVENT(kvm
, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc
.kmctr
)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc
.kmctr
)[1]);
1639 VM_EVENT(kvm
, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1640 ((unsigned long *) &kvm_s390_available_subfunc
.kmf
)[0],
1641 ((unsigned long *) &kvm_s390_available_subfunc
.kmf
)[1]);
1642 VM_EVENT(kvm
, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1643 ((unsigned long *) &kvm_s390_available_subfunc
.kmo
)[0],
1644 ((unsigned long *) &kvm_s390_available_subfunc
.kmo
)[1]);
1645 VM_EVENT(kvm
, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1646 ((unsigned long *) &kvm_s390_available_subfunc
.pcc
)[0],
1647 ((unsigned long *) &kvm_s390_available_subfunc
.pcc
)[1]);
1648 VM_EVENT(kvm
, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1649 ((unsigned long *) &kvm_s390_available_subfunc
.ppno
)[0],
1650 ((unsigned long *) &kvm_s390_available_subfunc
.ppno
)[1]);
1651 VM_EVENT(kvm
, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc
.kma
)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc
.kma
)[1]);
1654 VM_EVENT(kvm
, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1655 ((unsigned long *) &kvm_s390_available_subfunc
.kdsa
)[0],
1656 ((unsigned long *) &kvm_s390_available_subfunc
.kdsa
)[1]);
1657 VM_EVENT(kvm
, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 ((unsigned long *) &kvm_s390_available_subfunc
.sortl
)[0],
1659 ((unsigned long *) &kvm_s390_available_subfunc
.sortl
)[1],
1660 ((unsigned long *) &kvm_s390_available_subfunc
.sortl
)[2],
1661 ((unsigned long *) &kvm_s390_available_subfunc
.sortl
)[3]);
1662 VM_EVENT(kvm
, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm_s390_available_subfunc
.dfltcc
)[0],
1664 ((unsigned long *) &kvm_s390_available_subfunc
.dfltcc
)[1],
1665 ((unsigned long *) &kvm_s390_available_subfunc
.dfltcc
)[2],
1666 ((unsigned long *) &kvm_s390_available_subfunc
.dfltcc
)[3]);
1671 static int kvm_s390_get_cpu_model(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1675 switch (attr
->attr
) {
1676 case KVM_S390_VM_CPU_PROCESSOR
:
1677 ret
= kvm_s390_get_processor(kvm
, attr
);
1679 case KVM_S390_VM_CPU_MACHINE
:
1680 ret
= kvm_s390_get_machine(kvm
, attr
);
1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT
:
1683 ret
= kvm_s390_get_processor_feat(kvm
, attr
);
1685 case KVM_S390_VM_CPU_MACHINE_FEAT
:
1686 ret
= kvm_s390_get_machine_feat(kvm
, attr
);
1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC
:
1689 ret
= kvm_s390_get_processor_subfunc(kvm
, attr
);
1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC
:
1692 ret
= kvm_s390_get_machine_subfunc(kvm
, attr
);
1698 static int kvm_s390_vm_set_attr(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1702 switch (attr
->group
) {
1703 case KVM_S390_VM_MEM_CTRL
:
1704 ret
= kvm_s390_set_mem_control(kvm
, attr
);
1706 case KVM_S390_VM_TOD
:
1707 ret
= kvm_s390_set_tod(kvm
, attr
);
1709 case KVM_S390_VM_CPU_MODEL
:
1710 ret
= kvm_s390_set_cpu_model(kvm
, attr
);
1712 case KVM_S390_VM_CRYPTO
:
1713 ret
= kvm_s390_vm_set_crypto(kvm
, attr
);
1715 case KVM_S390_VM_MIGRATION
:
1716 ret
= kvm_s390_vm_set_migration(kvm
, attr
);
1726 static int kvm_s390_vm_get_attr(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1730 switch (attr
->group
) {
1731 case KVM_S390_VM_MEM_CTRL
:
1732 ret
= kvm_s390_get_mem_control(kvm
, attr
);
1734 case KVM_S390_VM_TOD
:
1735 ret
= kvm_s390_get_tod(kvm
, attr
);
1737 case KVM_S390_VM_CPU_MODEL
:
1738 ret
= kvm_s390_get_cpu_model(kvm
, attr
);
1740 case KVM_S390_VM_MIGRATION
:
1741 ret
= kvm_s390_vm_get_migration(kvm
, attr
);
1751 static int kvm_s390_vm_has_attr(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
1755 switch (attr
->group
) {
1756 case KVM_S390_VM_MEM_CTRL
:
1757 switch (attr
->attr
) {
1758 case KVM_S390_VM_MEM_ENABLE_CMMA
:
1759 case KVM_S390_VM_MEM_CLR_CMMA
:
1760 ret
= sclp
.has_cmma
? 0 : -ENXIO
;
1762 case KVM_S390_VM_MEM_LIMIT_SIZE
:
1770 case KVM_S390_VM_TOD
:
1771 switch (attr
->attr
) {
1772 case KVM_S390_VM_TOD_LOW
:
1773 case KVM_S390_VM_TOD_HIGH
:
1781 case KVM_S390_VM_CPU_MODEL
:
1782 switch (attr
->attr
) {
1783 case KVM_S390_VM_CPU_PROCESSOR
:
1784 case KVM_S390_VM_CPU_MACHINE
:
1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT
:
1786 case KVM_S390_VM_CPU_MACHINE_FEAT
:
1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC
:
1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC
:
1796 case KVM_S390_VM_CRYPTO
:
1797 switch (attr
->attr
) {
1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW
:
1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW
:
1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW
:
1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW
:
1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE
:
1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE
:
1806 ret
= ap_instructions_available() ? 0 : -ENXIO
;
1813 case KVM_S390_VM_MIGRATION
:
1824 static long kvm_s390_get_skeys(struct kvm
*kvm
, struct kvm_s390_skeys
*args
)
1828 int srcu_idx
, i
, r
= 0;
1830 if (args
->flags
!= 0)
1833 /* Is this guest using storage keys? */
1834 if (!mm_uses_skeys(current
->mm
))
1835 return KVM_S390_GET_SKEYS_NONE
;
1837 /* Enforce sane limit on memory allocation */
1838 if (args
->count
< 1 || args
->count
> KVM_S390_SKEYS_MAX
)
1841 keys
= kvmalloc_array(args
->count
, sizeof(uint8_t), GFP_KERNEL_ACCOUNT
);
1845 mmap_read_lock(current
->mm
);
1846 srcu_idx
= srcu_read_lock(&kvm
->srcu
);
1847 for (i
= 0; i
< args
->count
; i
++) {
1848 hva
= gfn_to_hva(kvm
, args
->start_gfn
+ i
);
1849 if (kvm_is_error_hva(hva
)) {
1854 r
= get_guest_storage_key(current
->mm
, hva
, &keys
[i
]);
1858 srcu_read_unlock(&kvm
->srcu
, srcu_idx
);
1859 mmap_read_unlock(current
->mm
);
1862 r
= copy_to_user((uint8_t __user
*)args
->skeydata_addr
, keys
,
1863 sizeof(uint8_t) * args
->count
);
1872 static long kvm_s390_set_skeys(struct kvm
*kvm
, struct kvm_s390_skeys
*args
)
1876 int srcu_idx
, i
, r
= 0;
1879 if (args
->flags
!= 0)
1882 /* Enforce sane limit on memory allocation */
1883 if (args
->count
< 1 || args
->count
> KVM_S390_SKEYS_MAX
)
1886 keys
= kvmalloc_array(args
->count
, sizeof(uint8_t), GFP_KERNEL_ACCOUNT
);
1890 r
= copy_from_user(keys
, (uint8_t __user
*)args
->skeydata_addr
,
1891 sizeof(uint8_t) * args
->count
);
1897 /* Enable storage key handling for the guest */
1898 r
= s390_enable_skey();
1903 mmap_read_lock(current
->mm
);
1904 srcu_idx
= srcu_read_lock(&kvm
->srcu
);
1905 while (i
< args
->count
) {
1907 hva
= gfn_to_hva(kvm
, args
->start_gfn
+ i
);
1908 if (kvm_is_error_hva(hva
)) {
1913 /* Lowest order bit is reserved */
1914 if (keys
[i
] & 0x01) {
1919 r
= set_guest_storage_key(current
->mm
, hva
, keys
[i
], 0);
1921 r
= fixup_user_fault(current
->mm
, hva
,
1922 FAULT_FLAG_WRITE
, &unlocked
);
1929 srcu_read_unlock(&kvm
->srcu
, srcu_idx
);
1930 mmap_read_unlock(current
->mm
);
1937 * Base address and length must be sent at the start of each block, therefore
1938 * it's cheaper to send some clean data, as long as it's less than the size of
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1946 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1947 * address falls in a hole. In that case the index of one of the memslots
1948 * bordering the hole is returned.
1950 static int gfn_to_memslot_approx(struct kvm_memslots
*slots
, gfn_t gfn
)
1952 int start
= 0, end
= slots
->used_slots
;
1953 int slot
= atomic_read(&slots
->last_used_slot
);
1954 struct kvm_memory_slot
*memslots
= slots
->memslots
;
1956 if (gfn
>= memslots
[slot
].base_gfn
&&
1957 gfn
< memslots
[slot
].base_gfn
+ memslots
[slot
].npages
)
1960 while (start
< end
) {
1961 slot
= start
+ (end
- start
) / 2;
1963 if (gfn
>= memslots
[slot
].base_gfn
)
1969 if (start
>= slots
->used_slots
)
1970 return slots
->used_slots
- 1;
1972 if (gfn
>= memslots
[start
].base_gfn
&&
1973 gfn
< memslots
[start
].base_gfn
+ memslots
[start
].npages
) {
1974 atomic_set(&slots
->last_used_slot
, start
);
1980 static int kvm_s390_peek_cmma(struct kvm
*kvm
, struct kvm_s390_cmma_log
*args
,
1981 u8
*res
, unsigned long bufsize
)
1983 unsigned long pgstev
, hva
, cur_gfn
= args
->start_gfn
;
1986 while (args
->count
< bufsize
) {
1987 hva
= gfn_to_hva(kvm
, cur_gfn
);
1989 * We return an error if the first value was invalid, but we
1990 * return successfully if at least one value was copied.
1992 if (kvm_is_error_hva(hva
))
1993 return args
->count
? 0 : -EFAULT
;
1994 if (get_pgste(kvm
->mm
, hva
, &pgstev
) < 0)
1996 res
[args
->count
++] = (pgstev
>> 24) & 0x43;
2003 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots
*slots
,
2004 unsigned long cur_gfn
)
2006 int slotidx
= gfn_to_memslot_approx(slots
, cur_gfn
);
2007 struct kvm_memory_slot
*ms
= slots
->memslots
+ slotidx
;
2008 unsigned long ofs
= cur_gfn
- ms
->base_gfn
;
2010 if (ms
->base_gfn
+ ms
->npages
<= cur_gfn
) {
2012 /* If we are above the highest slot, wrap around */
2014 slotidx
= slots
->used_slots
- 1;
2016 ms
= slots
->memslots
+ slotidx
;
2019 ofs
= find_next_bit(kvm_second_dirty_bitmap(ms
), ms
->npages
, ofs
);
2020 while ((slotidx
> 0) && (ofs
>= ms
->npages
)) {
2022 ms
= slots
->memslots
+ slotidx
;
2023 ofs
= find_next_bit(kvm_second_dirty_bitmap(ms
), ms
->npages
, 0);
2025 return ms
->base_gfn
+ ofs
;
2028 static int kvm_s390_get_cmma(struct kvm
*kvm
, struct kvm_s390_cmma_log
*args
,
2029 u8
*res
, unsigned long bufsize
)
2031 unsigned long mem_end
, cur_gfn
, next_gfn
, hva
, pgstev
;
2032 struct kvm_memslots
*slots
= kvm_memslots(kvm
);
2033 struct kvm_memory_slot
*ms
;
2035 if (unlikely(!slots
->used_slots
))
2038 cur_gfn
= kvm_s390_next_dirty_cmma(slots
, args
->start_gfn
);
2039 ms
= gfn_to_memslot(kvm
, cur_gfn
);
2041 args
->start_gfn
= cur_gfn
;
2044 next_gfn
= kvm_s390_next_dirty_cmma(slots
, cur_gfn
+ 1);
2045 mem_end
= slots
->memslots
[0].base_gfn
+ slots
->memslots
[0].npages
;
2047 while (args
->count
< bufsize
) {
2048 hva
= gfn_to_hva(kvm
, cur_gfn
);
2049 if (kvm_is_error_hva(hva
))
2051 /* Decrement only if we actually flipped the bit to 0 */
2052 if (test_and_clear_bit(cur_gfn
- ms
->base_gfn
, kvm_second_dirty_bitmap(ms
)))
2053 atomic64_dec(&kvm
->arch
.cmma_dirty_pages
);
2054 if (get_pgste(kvm
->mm
, hva
, &pgstev
) < 0)
2056 /* Save the value */
2057 res
[args
->count
++] = (pgstev
>> 24) & 0x43;
2058 /* If the next bit is too far away, stop. */
2059 if (next_gfn
> cur_gfn
+ KVM_S390_MAX_BIT_DISTANCE
)
2061 /* If we reached the previous "next", find the next one */
2062 if (cur_gfn
== next_gfn
)
2063 next_gfn
= kvm_s390_next_dirty_cmma(slots
, cur_gfn
+ 1);
2064 /* Reached the end of memory or of the buffer, stop */
2065 if ((next_gfn
>= mem_end
) ||
2066 (next_gfn
- args
->start_gfn
>= bufsize
))
2069 /* Reached the end of the current memslot, take the next one. */
2070 if (cur_gfn
- ms
->base_gfn
>= ms
->npages
) {
2071 ms
= gfn_to_memslot(kvm
, cur_gfn
);
2080 * This function searches for the next page with dirty CMMA attributes, and
2081 * saves the attributes in the buffer up to either the end of the buffer or
2082 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2083 * no trailing clean bytes are saved.
2084 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2085 * output buffer will indicate 0 as length.
2087 static int kvm_s390_get_cmma_bits(struct kvm
*kvm
,
2088 struct kvm_s390_cmma_log
*args
)
2090 unsigned long bufsize
;
2091 int srcu_idx
, peek
, ret
;
2094 if (!kvm
->arch
.use_cmma
)
2096 /* Invalid/unsupported flags were specified */
2097 if (args
->flags
& ~KVM_S390_CMMA_PEEK
)
2099 /* Migration mode query, and we are not doing a migration */
2100 peek
= !!(args
->flags
& KVM_S390_CMMA_PEEK
);
2101 if (!peek
&& !kvm
->arch
.migration_mode
)
2103 /* CMMA is disabled or was not used, or the buffer has length zero */
2104 bufsize
= min(args
->count
, KVM_S390_CMMA_SIZE_MAX
);
2105 if (!bufsize
|| !kvm
->mm
->context
.uses_cmm
) {
2106 memset(args
, 0, sizeof(*args
));
2109 /* We are not peeking, and there are no dirty pages */
2110 if (!peek
&& !atomic64_read(&kvm
->arch
.cmma_dirty_pages
)) {
2111 memset(args
, 0, sizeof(*args
));
2115 values
= vmalloc(bufsize
);
2119 mmap_read_lock(kvm
->mm
);
2120 srcu_idx
= srcu_read_lock(&kvm
->srcu
);
2122 ret
= kvm_s390_peek_cmma(kvm
, args
, values
, bufsize
);
2124 ret
= kvm_s390_get_cmma(kvm
, args
, values
, bufsize
);
2125 srcu_read_unlock(&kvm
->srcu
, srcu_idx
);
2126 mmap_read_unlock(kvm
->mm
);
2128 if (kvm
->arch
.migration_mode
)
2129 args
->remaining
= atomic64_read(&kvm
->arch
.cmma_dirty_pages
);
2131 args
->remaining
= 0;
2133 if (copy_to_user((void __user
*)args
->values
, values
, args
->count
))
2141 * This function sets the CMMA attributes for the given pages. If the input
2142 * buffer has zero length, no action is taken, otherwise the attributes are
2143 * set and the mm->context.uses_cmm flag is set.
2145 static int kvm_s390_set_cmma_bits(struct kvm
*kvm
,
2146 const struct kvm_s390_cmma_log
*args
)
2148 unsigned long hva
, mask
, pgstev
, i
;
2150 int srcu_idx
, r
= 0;
2154 if (!kvm
->arch
.use_cmma
)
2156 /* invalid/unsupported flags */
2157 if (args
->flags
!= 0)
2159 /* Enforce sane limit on memory allocation */
2160 if (args
->count
> KVM_S390_CMMA_SIZE_MAX
)
2163 if (args
->count
== 0)
2166 bits
= vmalloc(array_size(sizeof(*bits
), args
->count
));
2170 r
= copy_from_user(bits
, (void __user
*)args
->values
, args
->count
);
2176 mmap_read_lock(kvm
->mm
);
2177 srcu_idx
= srcu_read_lock(&kvm
->srcu
);
2178 for (i
= 0; i
< args
->count
; i
++) {
2179 hva
= gfn_to_hva(kvm
, args
->start_gfn
+ i
);
2180 if (kvm_is_error_hva(hva
)) {
2186 pgstev
= pgstev
<< 24;
2187 mask
&= _PGSTE_GPS_USAGE_MASK
| _PGSTE_GPS_NODAT
;
2188 set_pgste_bits(kvm
->mm
, hva
, mask
, pgstev
);
2190 srcu_read_unlock(&kvm
->srcu
, srcu_idx
);
2191 mmap_read_unlock(kvm
->mm
);
2193 if (!kvm
->mm
->context
.uses_cmm
) {
2194 mmap_write_lock(kvm
->mm
);
2195 kvm
->mm
->context
.uses_cmm
= 1;
2196 mmap_write_unlock(kvm
->mm
);
2203 static int kvm_s390_cpus_from_pv(struct kvm
*kvm
, u16
*rcp
, u16
*rrcp
)
2205 struct kvm_vcpu
*vcpu
;
2211 * We ignore failures and try to destroy as many CPUs as possible.
2212 * At the same time we must not free the assigned resources when
2213 * this fails, as the ultravisor has still access to that memory.
2214 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2216 * We want to return the first failure rc and rrc, though.
2218 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
2219 mutex_lock(&vcpu
->mutex
);
2220 if (kvm_s390_pv_destroy_cpu(vcpu
, &rc
, &rrc
) && !ret
) {
2225 mutex_unlock(&vcpu
->mutex
);
2227 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2229 kvm_s390_gisa_enable(kvm
);
2233 static int kvm_s390_cpus_to_pv(struct kvm
*kvm
, u16
*rc
, u16
*rrc
)
2238 struct kvm_vcpu
*vcpu
;
2240 /* Disable the GISA if the ultravisor does not support AIV. */
2241 if (!test_bit_inv(BIT_UV_FEAT_AIV
, &uv_info
.uv_feature_indications
))
2242 kvm_s390_gisa_disable(kvm
);
2244 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
2245 mutex_lock(&vcpu
->mutex
);
2246 r
= kvm_s390_pv_create_cpu(vcpu
, rc
, rrc
);
2247 mutex_unlock(&vcpu
->mutex
);
2252 kvm_s390_cpus_from_pv(kvm
, &dummy
, &dummy
);
2256 static int kvm_s390_handle_pv(struct kvm
*kvm
, struct kvm_pv_cmd
*cmd
)
2260 void __user
*argp
= (void __user
*)cmd
->data
;
2263 case KVM_PV_ENABLE
: {
2265 if (kvm_s390_pv_is_protected(kvm
))
2269 * FMT 4 SIE needs esca. As we never switch back to bsca from
2270 * esca, we need no cleanup in the error cases below
2272 r
= sca_switch_to_extended(kvm
);
2276 mmap_write_lock(current
->mm
);
2277 r
= gmap_mark_unmergeable();
2278 mmap_write_unlock(current
->mm
);
2282 r
= kvm_s390_pv_init_vm(kvm
, &cmd
->rc
, &cmd
->rrc
);
2286 r
= kvm_s390_cpus_to_pv(kvm
, &cmd
->rc
, &cmd
->rrc
);
2288 kvm_s390_pv_deinit_vm(kvm
, &dummy
, &dummy
);
2290 /* we need to block service interrupts from now on */
2291 set_bit(IRQ_PEND_EXT_SERVICE
, &kvm
->arch
.float_int
.masked_irqs
);
2294 case KVM_PV_DISABLE
: {
2296 if (!kvm_s390_pv_is_protected(kvm
))
2299 r
= kvm_s390_cpus_from_pv(kvm
, &cmd
->rc
, &cmd
->rrc
);
2301 * If a CPU could not be destroyed, destroy VM will also fail.
2302 * There is no point in trying to destroy it. Instead return
2303 * the rc and rrc from the first CPU that failed destroying.
2307 r
= kvm_s390_pv_deinit_vm(kvm
, &cmd
->rc
, &cmd
->rrc
);
2309 /* no need to block service interrupts any more */
2310 clear_bit(IRQ_PEND_EXT_SERVICE
, &kvm
->arch
.float_int
.masked_irqs
);
2313 case KVM_PV_SET_SEC_PARMS
: {
2314 struct kvm_s390_pv_sec_parm parms
= {};
2318 if (!kvm_s390_pv_is_protected(kvm
))
2322 if (copy_from_user(&parms
, argp
, sizeof(parms
)))
2325 /* Currently restricted to 8KB */
2327 if (parms
.length
> PAGE_SIZE
* 2)
2331 hdr
= vmalloc(parms
.length
);
2336 if (!copy_from_user(hdr
, (void __user
*)parms
.origin
,
2338 r
= kvm_s390_pv_set_sec_parms(kvm
, hdr
, parms
.length
,
2339 &cmd
->rc
, &cmd
->rrc
);
2344 case KVM_PV_UNPACK
: {
2345 struct kvm_s390_pv_unp unp
= {};
2348 if (!kvm_s390_pv_is_protected(kvm
) || !mm_is_protected(kvm
->mm
))
2352 if (copy_from_user(&unp
, argp
, sizeof(unp
)))
2355 r
= kvm_s390_pv_unpack(kvm
, unp
.addr
, unp
.size
, unp
.tweak
,
2356 &cmd
->rc
, &cmd
->rrc
);
2359 case KVM_PV_VERIFY
: {
2361 if (!kvm_s390_pv_is_protected(kvm
))
2364 r
= uv_cmd_nodata(kvm_s390_pv_get_handle(kvm
),
2365 UVC_CMD_VERIFY_IMG
, &cmd
->rc
, &cmd
->rrc
);
2366 KVM_UV_EVENT(kvm
, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd
->rc
,
2370 case KVM_PV_PREP_RESET
: {
2372 if (!kvm_s390_pv_is_protected(kvm
))
2375 r
= uv_cmd_nodata(kvm_s390_pv_get_handle(kvm
),
2376 UVC_CMD_PREPARE_RESET
, &cmd
->rc
, &cmd
->rrc
);
2377 KVM_UV_EVENT(kvm
, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2381 case KVM_PV_UNSHARE_ALL
: {
2383 if (!kvm_s390_pv_is_protected(kvm
))
2386 r
= uv_cmd_nodata(kvm_s390_pv_get_handle(kvm
),
2387 UVC_CMD_SET_UNSHARE_ALL
, &cmd
->rc
, &cmd
->rrc
);
2388 KVM_UV_EVENT(kvm
, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2398 static bool access_key_invalid(u8 access_key
)
2400 return access_key
> 0xf;
2403 static int kvm_s390_vm_mem_op(struct kvm
*kvm
, struct kvm_s390_mem_op
*mop
)
2405 void __user
*uaddr
= (void __user
*)mop
->buf
;
2406 u64 supported_flags
;
2407 void *tmpbuf
= NULL
;
2410 supported_flags
= KVM_S390_MEMOP_F_SKEY_PROTECTION
2411 | KVM_S390_MEMOP_F_CHECK_ONLY
;
2412 if (mop
->flags
& ~supported_flags
|| !mop
->size
)
2414 if (mop
->size
> MEM_OP_MAX_SIZE
)
2416 if (kvm_s390_pv_is_protected(kvm
))
2418 if (mop
->flags
& KVM_S390_MEMOP_F_SKEY_PROTECTION
) {
2419 if (access_key_invalid(mop
->key
))
2424 if (!(mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
)) {
2425 tmpbuf
= vmalloc(mop
->size
);
2430 srcu_idx
= srcu_read_lock(&kvm
->srcu
);
2432 if (kvm_is_error_gpa(kvm
, mop
->gaddr
)) {
2438 case KVM_S390_MEMOP_ABSOLUTE_READ
: {
2439 if (mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
) {
2440 r
= check_gpa_range(kvm
, mop
->gaddr
, mop
->size
, GACC_FETCH
, mop
->key
);
2442 r
= access_guest_abs_with_key(kvm
, mop
->gaddr
, tmpbuf
,
2443 mop
->size
, GACC_FETCH
, mop
->key
);
2445 if (copy_to_user(uaddr
, tmpbuf
, mop
->size
))
2451 case KVM_S390_MEMOP_ABSOLUTE_WRITE
: {
2452 if (mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
) {
2453 r
= check_gpa_range(kvm
, mop
->gaddr
, mop
->size
, GACC_STORE
, mop
->key
);
2455 if (copy_from_user(tmpbuf
, uaddr
, mop
->size
)) {
2459 r
= access_guest_abs_with_key(kvm
, mop
->gaddr
, tmpbuf
,
2460 mop
->size
, GACC_STORE
, mop
->key
);
2469 srcu_read_unlock(&kvm
->srcu
, srcu_idx
);
2475 long kvm_arch_vm_ioctl(struct file
*filp
,
2476 unsigned int ioctl
, unsigned long arg
)
2478 struct kvm
*kvm
= filp
->private_data
;
2479 void __user
*argp
= (void __user
*)arg
;
2480 struct kvm_device_attr attr
;
2484 case KVM_S390_INTERRUPT
: {
2485 struct kvm_s390_interrupt s390int
;
2488 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
2490 r
= kvm_s390_inject_vm(kvm
, &s390int
);
2493 case KVM_CREATE_IRQCHIP
: {
2494 struct kvm_irq_routing_entry routing
;
2497 if (kvm
->arch
.use_irqchip
) {
2498 /* Set up dummy routing. */
2499 memset(&routing
, 0, sizeof(routing
));
2500 r
= kvm_set_irq_routing(kvm
, &routing
, 0, 0);
2504 case KVM_SET_DEVICE_ATTR
: {
2506 if (copy_from_user(&attr
, (void __user
*)arg
, sizeof(attr
)))
2508 r
= kvm_s390_vm_set_attr(kvm
, &attr
);
2511 case KVM_GET_DEVICE_ATTR
: {
2513 if (copy_from_user(&attr
, (void __user
*)arg
, sizeof(attr
)))
2515 r
= kvm_s390_vm_get_attr(kvm
, &attr
);
2518 case KVM_HAS_DEVICE_ATTR
: {
2520 if (copy_from_user(&attr
, (void __user
*)arg
, sizeof(attr
)))
2522 r
= kvm_s390_vm_has_attr(kvm
, &attr
);
2525 case KVM_S390_GET_SKEYS
: {
2526 struct kvm_s390_skeys args
;
2529 if (copy_from_user(&args
, argp
,
2530 sizeof(struct kvm_s390_skeys
)))
2532 r
= kvm_s390_get_skeys(kvm
, &args
);
2535 case KVM_S390_SET_SKEYS
: {
2536 struct kvm_s390_skeys args
;
2539 if (copy_from_user(&args
, argp
,
2540 sizeof(struct kvm_s390_skeys
)))
2542 r
= kvm_s390_set_skeys(kvm
, &args
);
2545 case KVM_S390_GET_CMMA_BITS
: {
2546 struct kvm_s390_cmma_log args
;
2549 if (copy_from_user(&args
, argp
, sizeof(args
)))
2551 mutex_lock(&kvm
->slots_lock
);
2552 r
= kvm_s390_get_cmma_bits(kvm
, &args
);
2553 mutex_unlock(&kvm
->slots_lock
);
2555 r
= copy_to_user(argp
, &args
, sizeof(args
));
2561 case KVM_S390_SET_CMMA_BITS
: {
2562 struct kvm_s390_cmma_log args
;
2565 if (copy_from_user(&args
, argp
, sizeof(args
)))
2567 mutex_lock(&kvm
->slots_lock
);
2568 r
= kvm_s390_set_cmma_bits(kvm
, &args
);
2569 mutex_unlock(&kvm
->slots_lock
);
2572 case KVM_S390_PV_COMMAND
: {
2573 struct kvm_pv_cmd args
;
2575 /* protvirt means user cpu state */
2576 kvm_s390_set_user_cpu_state_ctrl(kvm
);
2578 if (!is_prot_virt_host()) {
2582 if (copy_from_user(&args
, argp
, sizeof(args
))) {
2590 mutex_lock(&kvm
->lock
);
2591 r
= kvm_s390_handle_pv(kvm
, &args
);
2592 mutex_unlock(&kvm
->lock
);
2593 if (copy_to_user(argp
, &args
, sizeof(args
))) {
2599 case KVM_S390_MEM_OP
: {
2600 struct kvm_s390_mem_op mem_op
;
2602 if (copy_from_user(&mem_op
, argp
, sizeof(mem_op
)) == 0)
2603 r
= kvm_s390_vm_mem_op(kvm
, &mem_op
);
2615 static int kvm_s390_apxa_installed(void)
2617 struct ap_config_info info
;
2619 if (ap_instructions_available()) {
2620 if (ap_qci(&info
) == 0)
2628 * The format of the crypto control block (CRYCB) is specified in the 3 low
2629 * order bits of the CRYCB designation (CRYCBD) field as follows:
2630 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2631 * AP extended addressing (APXA) facility are installed.
2632 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2633 * Format 2: Both the APXA and MSAX3 facilities are installed
2635 static void kvm_s390_set_crycb_format(struct kvm
*kvm
)
2637 kvm
->arch
.crypto
.crycbd
= (__u32
)(unsigned long) kvm
->arch
.crypto
.crycb
;
2639 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2640 kvm
->arch
.crypto
.crycbd
&= ~(CRYCB_FORMAT_MASK
);
2642 /* Check whether MSAX3 is installed */
2643 if (!test_kvm_facility(kvm
, 76))
2646 if (kvm_s390_apxa_installed())
2647 kvm
->arch
.crypto
.crycbd
|= CRYCB_FORMAT2
;
2649 kvm
->arch
.crypto
.crycbd
|= CRYCB_FORMAT1
;
2653 * kvm_arch_crypto_set_masks
2655 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2657 * @apm: the mask identifying the accessible AP adapters
2658 * @aqm: the mask identifying the accessible AP domains
2659 * @adm: the mask identifying the accessible AP control domains
2661 * Set the masks that identify the adapters, domains and control domains to
2662 * which the KVM guest is granted access.
2664 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2667 void kvm_arch_crypto_set_masks(struct kvm
*kvm
, unsigned long *apm
,
2668 unsigned long *aqm
, unsigned long *adm
)
2670 struct kvm_s390_crypto_cb
*crycb
= kvm
->arch
.crypto
.crycb
;
2672 kvm_s390_vcpu_block_all(kvm
);
2674 switch (kvm
->arch
.crypto
.crycbd
& CRYCB_FORMAT_MASK
) {
2675 case CRYCB_FORMAT2
: /* APCB1 use 256 bits */
2676 memcpy(crycb
->apcb1
.apm
, apm
, 32);
2677 VM_EVENT(kvm
, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2678 apm
[0], apm
[1], apm
[2], apm
[3]);
2679 memcpy(crycb
->apcb1
.aqm
, aqm
, 32);
2680 VM_EVENT(kvm
, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2681 aqm
[0], aqm
[1], aqm
[2], aqm
[3]);
2682 memcpy(crycb
->apcb1
.adm
, adm
, 32);
2683 VM_EVENT(kvm
, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2684 adm
[0], adm
[1], adm
[2], adm
[3]);
2687 case CRYCB_FORMAT0
: /* Fall through both use APCB0 */
2688 memcpy(crycb
->apcb0
.apm
, apm
, 8);
2689 memcpy(crycb
->apcb0
.aqm
, aqm
, 2);
2690 memcpy(crycb
->apcb0
.adm
, adm
, 2);
2691 VM_EVENT(kvm
, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2692 apm
[0], *((unsigned short *)aqm
),
2693 *((unsigned short *)adm
));
2695 default: /* Can not happen */
2699 /* recreate the shadow crycb for each vcpu */
2700 kvm_s390_sync_request_broadcast(kvm
, KVM_REQ_VSIE_RESTART
);
2701 kvm_s390_vcpu_unblock_all(kvm
);
2703 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks
);
2706 * kvm_arch_crypto_clear_masks
2708 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2711 * Clear the masks that identify the adapters, domains and control domains to
2712 * which the KVM guest is granted access.
2714 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2717 void kvm_arch_crypto_clear_masks(struct kvm
*kvm
)
2719 kvm_s390_vcpu_block_all(kvm
);
2721 memset(&kvm
->arch
.crypto
.crycb
->apcb0
, 0,
2722 sizeof(kvm
->arch
.crypto
.crycb
->apcb0
));
2723 memset(&kvm
->arch
.crypto
.crycb
->apcb1
, 0,
2724 sizeof(kvm
->arch
.crypto
.crycb
->apcb1
));
2726 VM_EVENT(kvm
, 3, "%s", "CLR CRYCB:");
2727 /* recreate the shadow crycb for each vcpu */
2728 kvm_s390_sync_request_broadcast(kvm
, KVM_REQ_VSIE_RESTART
);
2729 kvm_s390_vcpu_unblock_all(kvm
);
2731 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks
);
2733 static u64
kvm_s390_get_initial_cpuid(void)
2738 cpuid
.version
= 0xff;
2739 return *((u64
*) &cpuid
);
2742 static void kvm_s390_crypto_init(struct kvm
*kvm
)
2744 kvm
->arch
.crypto
.crycb
= &kvm
->arch
.sie_page2
->crycb
;
2745 kvm_s390_set_crycb_format(kvm
);
2746 init_rwsem(&kvm
->arch
.crypto
.pqap_hook_rwsem
);
2748 if (!test_kvm_facility(kvm
, 76))
2751 /* Enable AES/DEA protected key functions by default */
2752 kvm
->arch
.crypto
.aes_kw
= 1;
2753 kvm
->arch
.crypto
.dea_kw
= 1;
2754 get_random_bytes(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
,
2755 sizeof(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
));
2756 get_random_bytes(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
,
2757 sizeof(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
));
2760 static void sca_dispose(struct kvm
*kvm
)
2762 if (kvm
->arch
.use_esca
)
2763 free_pages_exact(kvm
->arch
.sca
, sizeof(struct esca_block
));
2765 free_page((unsigned long)(kvm
->arch
.sca
));
2766 kvm
->arch
.sca
= NULL
;
2769 int kvm_arch_init_vm(struct kvm
*kvm
, unsigned long type
)
2771 gfp_t alloc_flags
= GFP_KERNEL_ACCOUNT
;
2773 char debug_name
[16];
2774 static unsigned long sca_offset
;
2777 #ifdef CONFIG_KVM_S390_UCONTROL
2778 if (type
& ~KVM_VM_S390_UCONTROL
)
2780 if ((type
& KVM_VM_S390_UCONTROL
) && (!capable(CAP_SYS_ADMIN
)))
2787 rc
= s390_enable_sie();
2793 if (!sclp
.has_64bscao
)
2794 alloc_flags
|= GFP_DMA
;
2795 rwlock_init(&kvm
->arch
.sca_lock
);
2796 /* start with basic SCA */
2797 kvm
->arch
.sca
= (struct bsca_block
*) get_zeroed_page(alloc_flags
);
2800 mutex_lock(&kvm_lock
);
2802 if (sca_offset
+ sizeof(struct bsca_block
) > PAGE_SIZE
)
2804 kvm
->arch
.sca
= (struct bsca_block
*)
2805 ((char *) kvm
->arch
.sca
+ sca_offset
);
2806 mutex_unlock(&kvm_lock
);
2808 sprintf(debug_name
, "kvm-%u", current
->pid
);
2810 kvm
->arch
.dbf
= debug_register(debug_name
, 32, 1, 7 * sizeof(long));
2814 BUILD_BUG_ON(sizeof(struct sie_page2
) != 4096);
2815 kvm
->arch
.sie_page2
=
2816 (struct sie_page2
*) get_zeroed_page(GFP_KERNEL_ACCOUNT
| GFP_DMA
);
2817 if (!kvm
->arch
.sie_page2
)
2820 kvm
->arch
.sie_page2
->kvm
= kvm
;
2821 kvm
->arch
.model
.fac_list
= kvm
->arch
.sie_page2
->fac_list
;
2823 for (i
= 0; i
< kvm_s390_fac_size(); i
++) {
2824 kvm
->arch
.model
.fac_mask
[i
] = stfle_fac_list
[i
] &
2825 (kvm_s390_fac_base
[i
] |
2826 kvm_s390_fac_ext
[i
]);
2827 kvm
->arch
.model
.fac_list
[i
] = stfle_fac_list
[i
] &
2828 kvm_s390_fac_base
[i
];
2830 kvm
->arch
.model
.subfuncs
= kvm_s390_available_subfunc
;
2832 /* we are always in czam mode - even on pre z14 machines */
2833 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 138);
2834 set_kvm_facility(kvm
->arch
.model
.fac_list
, 138);
2835 /* we emulate STHYI in kvm */
2836 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 74);
2837 set_kvm_facility(kvm
->arch
.model
.fac_list
, 74);
2838 if (MACHINE_HAS_TLB_GUEST
) {
2839 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 147);
2840 set_kvm_facility(kvm
->arch
.model
.fac_list
, 147);
2843 if (css_general_characteristics
.aiv
&& test_facility(65))
2844 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 65);
2846 kvm
->arch
.model
.cpuid
= kvm_s390_get_initial_cpuid();
2847 kvm
->arch
.model
.ibc
= sclp
.ibc
& 0x0fff;
2849 kvm_s390_crypto_init(kvm
);
2851 mutex_init(&kvm
->arch
.float_int
.ais_lock
);
2852 spin_lock_init(&kvm
->arch
.float_int
.lock
);
2853 for (i
= 0; i
< FIRQ_LIST_COUNT
; i
++)
2854 INIT_LIST_HEAD(&kvm
->arch
.float_int
.lists
[i
]);
2855 init_waitqueue_head(&kvm
->arch
.ipte_wq
);
2856 mutex_init(&kvm
->arch
.ipte_mutex
);
2858 debug_register_view(kvm
->arch
.dbf
, &debug_sprintf_view
);
2859 VM_EVENT(kvm
, 3, "vm created with type %lu", type
);
2861 if (type
& KVM_VM_S390_UCONTROL
) {
2862 kvm
->arch
.gmap
= NULL
;
2863 kvm
->arch
.mem_limit
= KVM_S390_NO_MEM_LIMIT
;
2865 if (sclp
.hamax
== U64_MAX
)
2866 kvm
->arch
.mem_limit
= TASK_SIZE_MAX
;
2868 kvm
->arch
.mem_limit
= min_t(unsigned long, TASK_SIZE_MAX
,
2870 kvm
->arch
.gmap
= gmap_create(current
->mm
, kvm
->arch
.mem_limit
- 1);
2871 if (!kvm
->arch
.gmap
)
2873 kvm
->arch
.gmap
->private = kvm
;
2874 kvm
->arch
.gmap
->pfault_enabled
= 0;
2877 kvm
->arch
.use_pfmfi
= sclp
.has_pfmfi
;
2878 kvm
->arch
.use_skf
= sclp
.has_skey
;
2879 spin_lock_init(&kvm
->arch
.start_stop_lock
);
2880 kvm_s390_vsie_init(kvm
);
2882 kvm_s390_gisa_init(kvm
);
2883 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm
, current
->pid
);
2887 free_page((unsigned long)kvm
->arch
.sie_page2
);
2888 debug_unregister(kvm
->arch
.dbf
);
2890 KVM_EVENT(3, "creation of vm failed: %d", rc
);
2894 void kvm_arch_vcpu_destroy(struct kvm_vcpu
*vcpu
)
2898 VCPU_EVENT(vcpu
, 3, "%s", "free cpu");
2899 trace_kvm_s390_destroy_vcpu(vcpu
->vcpu_id
);
2900 kvm_s390_clear_local_irqs(vcpu
);
2901 kvm_clear_async_pf_completion_queue(vcpu
);
2902 if (!kvm_is_ucontrol(vcpu
->kvm
))
2905 if (kvm_is_ucontrol(vcpu
->kvm
))
2906 gmap_remove(vcpu
->arch
.gmap
);
2908 if (vcpu
->kvm
->arch
.use_cmma
)
2909 kvm_s390_vcpu_unsetup_cmma(vcpu
);
2910 /* We can not hold the vcpu mutex here, we are already dying */
2911 if (kvm_s390_pv_cpu_get_handle(vcpu
))
2912 kvm_s390_pv_destroy_cpu(vcpu
, &rc
, &rrc
);
2913 free_page((unsigned long)(vcpu
->arch
.sie_block
));
2916 static void kvm_free_vcpus(struct kvm
*kvm
)
2919 struct kvm_vcpu
*vcpu
;
2921 kvm_for_each_vcpu(i
, vcpu
, kvm
)
2922 kvm_vcpu_destroy(vcpu
);
2924 mutex_lock(&kvm
->lock
);
2925 for (i
= 0; i
< atomic_read(&kvm
->online_vcpus
); i
++)
2926 kvm
->vcpus
[i
] = NULL
;
2928 atomic_set(&kvm
->online_vcpus
, 0);
2929 mutex_unlock(&kvm
->lock
);
2932 void kvm_arch_destroy_vm(struct kvm
*kvm
)
2936 kvm_free_vcpus(kvm
);
2938 kvm_s390_gisa_destroy(kvm
);
2940 * We are already at the end of life and kvm->lock is not taken.
2941 * This is ok as the file descriptor is closed by now and nobody
2942 * can mess with the pv state. To avoid lockdep_assert_held from
2943 * complaining we do not use kvm_s390_pv_is_protected.
2945 if (kvm_s390_pv_get_handle(kvm
))
2946 kvm_s390_pv_deinit_vm(kvm
, &rc
, &rrc
);
2947 debug_unregister(kvm
->arch
.dbf
);
2948 free_page((unsigned long)kvm
->arch
.sie_page2
);
2949 if (!kvm_is_ucontrol(kvm
))
2950 gmap_remove(kvm
->arch
.gmap
);
2951 kvm_s390_destroy_adapters(kvm
);
2952 kvm_s390_clear_float_irqs(kvm
);
2953 kvm_s390_vsie_destroy(kvm
);
2954 KVM_EVENT(3, "vm 0x%pK destroyed", kvm
);
2957 /* Section: vcpu related */
2958 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu
*vcpu
)
2960 vcpu
->arch
.gmap
= gmap_create(current
->mm
, -1UL);
2961 if (!vcpu
->arch
.gmap
)
2963 vcpu
->arch
.gmap
->private = vcpu
->kvm
;
2968 static void sca_del_vcpu(struct kvm_vcpu
*vcpu
)
2970 if (!kvm_s390_use_sca_entries())
2972 read_lock(&vcpu
->kvm
->arch
.sca_lock
);
2973 if (vcpu
->kvm
->arch
.use_esca
) {
2974 struct esca_block
*sca
= vcpu
->kvm
->arch
.sca
;
2976 clear_bit_inv(vcpu
->vcpu_id
, (unsigned long *) sca
->mcn
);
2977 sca
->cpu
[vcpu
->vcpu_id
].sda
= 0;
2979 struct bsca_block
*sca
= vcpu
->kvm
->arch
.sca
;
2981 clear_bit_inv(vcpu
->vcpu_id
, (unsigned long *) &sca
->mcn
);
2982 sca
->cpu
[vcpu
->vcpu_id
].sda
= 0;
2984 read_unlock(&vcpu
->kvm
->arch
.sca_lock
);
2987 static void sca_add_vcpu(struct kvm_vcpu
*vcpu
)
2989 if (!kvm_s390_use_sca_entries()) {
2990 struct bsca_block
*sca
= vcpu
->kvm
->arch
.sca
;
2992 /* we still need the basic sca for the ipte control */
2993 vcpu
->arch
.sie_block
->scaoh
= (__u32
)(((__u64
)sca
) >> 32);
2994 vcpu
->arch
.sie_block
->scaol
= (__u32
)(__u64
)sca
;
2997 read_lock(&vcpu
->kvm
->arch
.sca_lock
);
2998 if (vcpu
->kvm
->arch
.use_esca
) {
2999 struct esca_block
*sca
= vcpu
->kvm
->arch
.sca
;
3001 sca
->cpu
[vcpu
->vcpu_id
].sda
= (__u64
) vcpu
->arch
.sie_block
;
3002 vcpu
->arch
.sie_block
->scaoh
= (__u32
)(((__u64
)sca
) >> 32);
3003 vcpu
->arch
.sie_block
->scaol
= (__u32
)(__u64
)sca
& ~0x3fU
;
3004 vcpu
->arch
.sie_block
->ecb2
|= ECB2_ESCA
;
3005 set_bit_inv(vcpu
->vcpu_id
, (unsigned long *) sca
->mcn
);
3007 struct bsca_block
*sca
= vcpu
->kvm
->arch
.sca
;
3009 sca
->cpu
[vcpu
->vcpu_id
].sda
= (__u64
) vcpu
->arch
.sie_block
;
3010 vcpu
->arch
.sie_block
->scaoh
= (__u32
)(((__u64
)sca
) >> 32);
3011 vcpu
->arch
.sie_block
->scaol
= (__u32
)(__u64
)sca
;
3012 set_bit_inv(vcpu
->vcpu_id
, (unsigned long *) &sca
->mcn
);
3014 read_unlock(&vcpu
->kvm
->arch
.sca_lock
);
3017 /* Basic SCA to Extended SCA data copy routines */
3018 static inline void sca_copy_entry(struct esca_entry
*d
, struct bsca_entry
*s
)
3021 d
->sigp_ctrl
.c
= s
->sigp_ctrl
.c
;
3022 d
->sigp_ctrl
.scn
= s
->sigp_ctrl
.scn
;
3025 static void sca_copy_b_to_e(struct esca_block
*d
, struct bsca_block
*s
)
3029 d
->ipte_control
= s
->ipte_control
;
3031 for (i
= 0; i
< KVM_S390_BSCA_CPU_SLOTS
; i
++)
3032 sca_copy_entry(&d
->cpu
[i
], &s
->cpu
[i
]);
3035 static int sca_switch_to_extended(struct kvm
*kvm
)
3037 struct bsca_block
*old_sca
= kvm
->arch
.sca
;
3038 struct esca_block
*new_sca
;
3039 struct kvm_vcpu
*vcpu
;
3040 unsigned int vcpu_idx
;
3043 if (kvm
->arch
.use_esca
)
3046 new_sca
= alloc_pages_exact(sizeof(*new_sca
), GFP_KERNEL_ACCOUNT
| __GFP_ZERO
);
3050 scaoh
= (u32
)((u64
)(new_sca
) >> 32);
3051 scaol
= (u32
)(u64
)(new_sca
) & ~0x3fU
;
3053 kvm_s390_vcpu_block_all(kvm
);
3054 write_lock(&kvm
->arch
.sca_lock
);
3056 sca_copy_b_to_e(new_sca
, old_sca
);
3058 kvm_for_each_vcpu(vcpu_idx
, vcpu
, kvm
) {
3059 vcpu
->arch
.sie_block
->scaoh
= scaoh
;
3060 vcpu
->arch
.sie_block
->scaol
= scaol
;
3061 vcpu
->arch
.sie_block
->ecb2
|= ECB2_ESCA
;
3063 kvm
->arch
.sca
= new_sca
;
3064 kvm
->arch
.use_esca
= 1;
3066 write_unlock(&kvm
->arch
.sca_lock
);
3067 kvm_s390_vcpu_unblock_all(kvm
);
3069 free_page((unsigned long)old_sca
);
3071 VM_EVENT(kvm
, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3072 old_sca
, kvm
->arch
.sca
);
3076 static int sca_can_add_vcpu(struct kvm
*kvm
, unsigned int id
)
3080 if (!kvm_s390_use_sca_entries()) {
3081 if (id
< KVM_MAX_VCPUS
)
3085 if (id
< KVM_S390_BSCA_CPU_SLOTS
)
3087 if (!sclp
.has_esca
|| !sclp
.has_64bscao
)
3090 mutex_lock(&kvm
->lock
);
3091 rc
= kvm
->arch
.use_esca
? 0 : sca_switch_to_extended(kvm
);
3092 mutex_unlock(&kvm
->lock
);
3094 return rc
== 0 && id
< KVM_S390_ESCA_CPU_SLOTS
;
3097 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3098 static void __start_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
3100 WARN_ON_ONCE(vcpu
->arch
.cputm_start
!= 0);
3101 raw_write_seqcount_begin(&vcpu
->arch
.cputm_seqcount
);
3102 vcpu
->arch
.cputm_start
= get_tod_clock_fast();
3103 raw_write_seqcount_end(&vcpu
->arch
.cputm_seqcount
);
3106 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3107 static void __stop_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
3109 WARN_ON_ONCE(vcpu
->arch
.cputm_start
== 0);
3110 raw_write_seqcount_begin(&vcpu
->arch
.cputm_seqcount
);
3111 vcpu
->arch
.sie_block
->cputm
-= get_tod_clock_fast() - vcpu
->arch
.cputm_start
;
3112 vcpu
->arch
.cputm_start
= 0;
3113 raw_write_seqcount_end(&vcpu
->arch
.cputm_seqcount
);
3116 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3117 static void __enable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
3119 WARN_ON_ONCE(vcpu
->arch
.cputm_enabled
);
3120 vcpu
->arch
.cputm_enabled
= true;
3121 __start_cpu_timer_accounting(vcpu
);
3124 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3125 static void __disable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
3127 WARN_ON_ONCE(!vcpu
->arch
.cputm_enabled
);
3128 __stop_cpu_timer_accounting(vcpu
);
3129 vcpu
->arch
.cputm_enabled
= false;
3132 static void enable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
3134 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3135 __enable_cpu_timer_accounting(vcpu
);
3139 static void disable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
3141 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3142 __disable_cpu_timer_accounting(vcpu
);
3146 /* set the cpu timer - may only be called from the VCPU thread itself */
3147 void kvm_s390_set_cpu_timer(struct kvm_vcpu
*vcpu
, __u64 cputm
)
3149 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3150 raw_write_seqcount_begin(&vcpu
->arch
.cputm_seqcount
);
3151 if (vcpu
->arch
.cputm_enabled
)
3152 vcpu
->arch
.cputm_start
= get_tod_clock_fast();
3153 vcpu
->arch
.sie_block
->cputm
= cputm
;
3154 raw_write_seqcount_end(&vcpu
->arch
.cputm_seqcount
);
3158 /* update and get the cpu timer - can also be called from other VCPU threads */
3159 __u64
kvm_s390_get_cpu_timer(struct kvm_vcpu
*vcpu
)
3164 if (unlikely(!vcpu
->arch
.cputm_enabled
))
3165 return vcpu
->arch
.sie_block
->cputm
;
3167 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3169 seq
= raw_read_seqcount(&vcpu
->arch
.cputm_seqcount
);
3171 * If the writer would ever execute a read in the critical
3172 * section, e.g. in irq context, we have a deadlock.
3174 WARN_ON_ONCE((seq
& 1) && smp_processor_id() == vcpu
->cpu
);
3175 value
= vcpu
->arch
.sie_block
->cputm
;
3176 /* if cputm_start is 0, accounting is being started/stopped */
3177 if (likely(vcpu
->arch
.cputm_start
))
3178 value
-= get_tod_clock_fast() - vcpu
->arch
.cputm_start
;
3179 } while (read_seqcount_retry(&vcpu
->arch
.cputm_seqcount
, seq
& ~1));
3184 void kvm_arch_vcpu_load(struct kvm_vcpu
*vcpu
, int cpu
)
3187 gmap_enable(vcpu
->arch
.enabled_gmap
);
3188 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_RUNNING
);
3189 if (vcpu
->arch
.cputm_enabled
&& !is_vcpu_idle(vcpu
))
3190 __start_cpu_timer_accounting(vcpu
);
3194 void kvm_arch_vcpu_put(struct kvm_vcpu
*vcpu
)
3197 if (vcpu
->arch
.cputm_enabled
&& !is_vcpu_idle(vcpu
))
3198 __stop_cpu_timer_accounting(vcpu
);
3199 kvm_s390_clear_cpuflags(vcpu
, CPUSTAT_RUNNING
);
3200 vcpu
->arch
.enabled_gmap
= gmap_get_enabled();
3201 gmap_disable(vcpu
->arch
.enabled_gmap
);
3205 void kvm_arch_vcpu_postcreate(struct kvm_vcpu
*vcpu
)
3207 mutex_lock(&vcpu
->kvm
->lock
);
3209 vcpu
->arch
.sie_block
->epoch
= vcpu
->kvm
->arch
.epoch
;
3210 vcpu
->arch
.sie_block
->epdx
= vcpu
->kvm
->arch
.epdx
;
3212 mutex_unlock(&vcpu
->kvm
->lock
);
3213 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
3214 vcpu
->arch
.gmap
= vcpu
->kvm
->arch
.gmap
;
3217 if (test_kvm_facility(vcpu
->kvm
, 74) || vcpu
->kvm
->arch
.user_instr0
)
3218 vcpu
->arch
.sie_block
->ictl
|= ICTL_OPEREXC
;
3219 /* make vcpu_load load the right gmap on the first trigger */
3220 vcpu
->arch
.enabled_gmap
= vcpu
->arch
.gmap
;
3223 static bool kvm_has_pckmo_subfunc(struct kvm
*kvm
, unsigned long nr
)
3225 if (test_bit_inv(nr
, (unsigned long *)&kvm
->arch
.model
.subfuncs
.pckmo
) &&
3226 test_bit_inv(nr
, (unsigned long *)&kvm_s390_available_subfunc
.pckmo
))
3231 static bool kvm_has_pckmo_ecc(struct kvm
*kvm
)
3233 /* At least one ECC subfunction must be present */
3234 return kvm_has_pckmo_subfunc(kvm
, 32) ||
3235 kvm_has_pckmo_subfunc(kvm
, 33) ||
3236 kvm_has_pckmo_subfunc(kvm
, 34) ||
3237 kvm_has_pckmo_subfunc(kvm
, 40) ||
3238 kvm_has_pckmo_subfunc(kvm
, 41);
3242 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu
*vcpu
)
3245 * If the AP instructions are not being interpreted and the MSAX3
3246 * facility is not configured for the guest, there is nothing to set up.
3248 if (!vcpu
->kvm
->arch
.crypto
.apie
&& !test_kvm_facility(vcpu
->kvm
, 76))
3251 vcpu
->arch
.sie_block
->crycbd
= vcpu
->kvm
->arch
.crypto
.crycbd
;
3252 vcpu
->arch
.sie_block
->ecb3
&= ~(ECB3_AES
| ECB3_DEA
);
3253 vcpu
->arch
.sie_block
->eca
&= ~ECA_APIE
;
3254 vcpu
->arch
.sie_block
->ecd
&= ~ECD_ECC
;
3256 if (vcpu
->kvm
->arch
.crypto
.apie
)
3257 vcpu
->arch
.sie_block
->eca
|= ECA_APIE
;
3259 /* Set up protected key support */
3260 if (vcpu
->kvm
->arch
.crypto
.aes_kw
) {
3261 vcpu
->arch
.sie_block
->ecb3
|= ECB3_AES
;
3262 /* ecc is also wrapped with AES key */
3263 if (kvm_has_pckmo_ecc(vcpu
->kvm
))
3264 vcpu
->arch
.sie_block
->ecd
|= ECD_ECC
;
3267 if (vcpu
->kvm
->arch
.crypto
.dea_kw
)
3268 vcpu
->arch
.sie_block
->ecb3
|= ECB3_DEA
;
3271 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu
*vcpu
)
3273 free_page(vcpu
->arch
.sie_block
->cbrlo
);
3274 vcpu
->arch
.sie_block
->cbrlo
= 0;
3277 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu
*vcpu
)
3279 vcpu
->arch
.sie_block
->cbrlo
= get_zeroed_page(GFP_KERNEL_ACCOUNT
);
3280 if (!vcpu
->arch
.sie_block
->cbrlo
)
3285 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu
*vcpu
)
3287 struct kvm_s390_cpu_model
*model
= &vcpu
->kvm
->arch
.model
;
3289 vcpu
->arch
.sie_block
->ibc
= model
->ibc
;
3290 if (test_kvm_facility(vcpu
->kvm
, 7))
3291 vcpu
->arch
.sie_block
->fac
= (u32
)(u64
) model
->fac_list
;
3294 static int kvm_s390_vcpu_setup(struct kvm_vcpu
*vcpu
)
3299 atomic_set(&vcpu
->arch
.sie_block
->cpuflags
, CPUSTAT_ZARCH
|
3303 if (test_kvm_facility(vcpu
->kvm
, 78))
3304 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_GED2
);
3305 else if (test_kvm_facility(vcpu
->kvm
, 8))
3306 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_GED
);
3308 kvm_s390_vcpu_setup_model(vcpu
);
3310 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3311 if (MACHINE_HAS_ESOP
)
3312 vcpu
->arch
.sie_block
->ecb
|= ECB_HOSTPROTINT
;
3313 if (test_kvm_facility(vcpu
->kvm
, 9))
3314 vcpu
->arch
.sie_block
->ecb
|= ECB_SRSI
;
3315 if (test_kvm_facility(vcpu
->kvm
, 73))
3316 vcpu
->arch
.sie_block
->ecb
|= ECB_TE
;
3317 if (!kvm_is_ucontrol(vcpu
->kvm
))
3318 vcpu
->arch
.sie_block
->ecb
|= ECB_SPECI
;
3320 if (test_kvm_facility(vcpu
->kvm
, 8) && vcpu
->kvm
->arch
.use_pfmfi
)
3321 vcpu
->arch
.sie_block
->ecb2
|= ECB2_PFMFI
;
3322 if (test_kvm_facility(vcpu
->kvm
, 130))
3323 vcpu
->arch
.sie_block
->ecb2
|= ECB2_IEP
;
3324 vcpu
->arch
.sie_block
->eca
= ECA_MVPGI
| ECA_PROTEXCI
;
3326 vcpu
->arch
.sie_block
->eca
|= ECA_CEI
;
3328 vcpu
->arch
.sie_block
->eca
|= ECA_IB
;
3330 vcpu
->arch
.sie_block
->eca
|= ECA_SII
;
3331 if (sclp
.has_sigpif
)
3332 vcpu
->arch
.sie_block
->eca
|= ECA_SIGPI
;
3333 if (test_kvm_facility(vcpu
->kvm
, 129)) {
3334 vcpu
->arch
.sie_block
->eca
|= ECA_VX
;
3335 vcpu
->arch
.sie_block
->ecd
|= ECD_HOSTREGMGMT
;
3337 if (test_kvm_facility(vcpu
->kvm
, 139))
3338 vcpu
->arch
.sie_block
->ecd
|= ECD_MEF
;
3339 if (test_kvm_facility(vcpu
->kvm
, 156))
3340 vcpu
->arch
.sie_block
->ecd
|= ECD_ETOKENF
;
3341 if (vcpu
->arch
.sie_block
->gd
) {
3342 vcpu
->arch
.sie_block
->eca
|= ECA_AIV
;
3343 VCPU_EVENT(vcpu
, 3, "AIV gisa format-%u enabled for cpu %03u",
3344 vcpu
->arch
.sie_block
->gd
& 0x3, vcpu
->vcpu_id
);
3346 vcpu
->arch
.sie_block
->sdnxo
= ((unsigned long) &vcpu
->run
->s
.regs
.sdnx
)
3348 vcpu
->arch
.sie_block
->riccbd
= (unsigned long) &vcpu
->run
->s
.regs
.riccb
;
3351 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_KSS
);
3353 vcpu
->arch
.sie_block
->ictl
|= ICTL_ISKE
| ICTL_SSKE
| ICTL_RRBE
;
3355 if (vcpu
->kvm
->arch
.use_cmma
) {
3356 rc
= kvm_s390_vcpu_setup_cmma(vcpu
);
3360 hrtimer_init(&vcpu
->arch
.ckc_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
3361 vcpu
->arch
.ckc_timer
.function
= kvm_s390_idle_wakeup
;
3363 vcpu
->arch
.sie_block
->hpid
= HPID_KVM
;
3365 kvm_s390_vcpu_crypto_setup(vcpu
);
3367 mutex_lock(&vcpu
->kvm
->lock
);
3368 if (kvm_s390_pv_is_protected(vcpu
->kvm
)) {
3369 rc
= kvm_s390_pv_create_cpu(vcpu
, &uvrc
, &uvrrc
);
3371 kvm_s390_vcpu_unsetup_cmma(vcpu
);
3373 mutex_unlock(&vcpu
->kvm
->lock
);
3378 int kvm_arch_vcpu_precreate(struct kvm
*kvm
, unsigned int id
)
3380 if (!kvm_is_ucontrol(kvm
) && !sca_can_add_vcpu(kvm
, id
))
3385 int kvm_arch_vcpu_create(struct kvm_vcpu
*vcpu
)
3387 struct sie_page
*sie_page
;
3390 BUILD_BUG_ON(sizeof(struct sie_page
) != 4096);
3391 sie_page
= (struct sie_page
*) get_zeroed_page(GFP_KERNEL_ACCOUNT
);
3395 vcpu
->arch
.sie_block
= &sie_page
->sie_block
;
3396 vcpu
->arch
.sie_block
->itdba
= (unsigned long) &sie_page
->itdb
;
3398 /* the real guest size will always be smaller than msl */
3399 vcpu
->arch
.sie_block
->mso
= 0;
3400 vcpu
->arch
.sie_block
->msl
= sclp
.hamax
;
3402 vcpu
->arch
.sie_block
->icpua
= vcpu
->vcpu_id
;
3403 spin_lock_init(&vcpu
->arch
.local_int
.lock
);
3404 vcpu
->arch
.sie_block
->gd
= kvm_s390_get_gisa_desc(vcpu
->kvm
);
3405 seqcount_init(&vcpu
->arch
.cputm_seqcount
);
3407 vcpu
->arch
.pfault_token
= KVM_S390_PFAULT_TOKEN_INVALID
;
3408 kvm_clear_async_pf_completion_queue(vcpu
);
3409 vcpu
->run
->kvm_valid_regs
= KVM_SYNC_PREFIX
|
3416 kvm_s390_set_prefix(vcpu
, 0);
3417 if (test_kvm_facility(vcpu
->kvm
, 64))
3418 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_RICCB
;
3419 if (test_kvm_facility(vcpu
->kvm
, 82))
3420 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_BPBC
;
3421 if (test_kvm_facility(vcpu
->kvm
, 133))
3422 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_GSCB
;
3423 if (test_kvm_facility(vcpu
->kvm
, 156))
3424 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_ETOKEN
;
3425 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3426 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3429 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_VRS
;
3431 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_FPRS
;
3433 if (kvm_is_ucontrol(vcpu
->kvm
)) {
3434 rc
= __kvm_ucontrol_vcpu_init(vcpu
);
3436 goto out_free_sie_block
;
3439 VM_EVENT(vcpu
->kvm
, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3440 vcpu
->vcpu_id
, vcpu
, vcpu
->arch
.sie_block
);
3441 trace_kvm_s390_create_vcpu(vcpu
->vcpu_id
, vcpu
, vcpu
->arch
.sie_block
);
3443 rc
= kvm_s390_vcpu_setup(vcpu
);
3445 goto out_ucontrol_uninit
;
3448 out_ucontrol_uninit
:
3449 if (kvm_is_ucontrol(vcpu
->kvm
))
3450 gmap_remove(vcpu
->arch
.gmap
);
3452 free_page((unsigned long)(vcpu
->arch
.sie_block
));
3456 int kvm_arch_vcpu_runnable(struct kvm_vcpu
*vcpu
)
3458 clear_bit(vcpu
->vcpu_idx
, vcpu
->kvm
->arch
.gisa_int
.kicked_mask
);
3459 return kvm_s390_vcpu_has_irq(vcpu
, 0);
3462 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu
*vcpu
)
3464 return !(vcpu
->arch
.sie_block
->gpsw
.mask
& PSW_MASK_PSTATE
);
3467 void kvm_s390_vcpu_block(struct kvm_vcpu
*vcpu
)
3469 atomic_or(PROG_BLOCK_SIE
, &vcpu
->arch
.sie_block
->prog20
);
3473 void kvm_s390_vcpu_unblock(struct kvm_vcpu
*vcpu
)
3475 atomic_andnot(PROG_BLOCK_SIE
, &vcpu
->arch
.sie_block
->prog20
);
3478 static void kvm_s390_vcpu_request(struct kvm_vcpu
*vcpu
)
3480 atomic_or(PROG_REQUEST
, &vcpu
->arch
.sie_block
->prog20
);
3484 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu
*vcpu
)
3486 return atomic_read(&vcpu
->arch
.sie_block
->prog20
) &
3487 (PROG_BLOCK_SIE
| PROG_REQUEST
);
3490 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu
*vcpu
)
3492 atomic_andnot(PROG_REQUEST
, &vcpu
->arch
.sie_block
->prog20
);
3496 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3497 * If the CPU is not running (e.g. waiting as idle) the function will
3498 * return immediately. */
3499 void exit_sie(struct kvm_vcpu
*vcpu
)
3501 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_STOP_INT
);
3502 kvm_s390_vsie_kick(vcpu
);
3503 while (vcpu
->arch
.sie_block
->prog0c
& PROG_IN_SIE
)
3507 /* Kick a guest cpu out of SIE to process a request synchronously */
3508 void kvm_s390_sync_request(int req
, struct kvm_vcpu
*vcpu
)
3510 kvm_make_request(req
, vcpu
);
3511 kvm_s390_vcpu_request(vcpu
);
3514 static void kvm_gmap_notifier(struct gmap
*gmap
, unsigned long start
,
3517 struct kvm
*kvm
= gmap
->private;
3518 struct kvm_vcpu
*vcpu
;
3519 unsigned long prefix
;
3522 if (gmap_is_shadow(gmap
))
3524 if (start
>= 1UL << 31)
3525 /* We are only interested in prefix pages */
3527 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
3528 /* match against both prefix pages */
3529 prefix
= kvm_s390_get_prefix(vcpu
);
3530 if (prefix
<= end
&& start
<= prefix
+ 2*PAGE_SIZE
- 1) {
3531 VCPU_EVENT(vcpu
, 2, "gmap notifier for %lx-%lx",
3533 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD
, vcpu
);
3538 bool kvm_arch_no_poll(struct kvm_vcpu
*vcpu
)
3540 /* do not poll with more than halt_poll_max_steal percent of steal time */
3541 if (S390_lowcore
.avg_steal_timer
* 100 / (TICK_USEC
<< 12) >=
3542 READ_ONCE(halt_poll_max_steal
)) {
3543 vcpu
->stat
.halt_no_poll_steal
++;
3549 int kvm_arch_vcpu_should_kick(struct kvm_vcpu
*vcpu
)
3551 /* kvm common code refers to this, but never calls it */
3556 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu
*vcpu
,
3557 struct kvm_one_reg
*reg
)
3562 case KVM_REG_S390_TODPR
:
3563 r
= put_user(vcpu
->arch
.sie_block
->todpr
,
3564 (u32 __user
*)reg
->addr
);
3566 case KVM_REG_S390_EPOCHDIFF
:
3567 r
= put_user(vcpu
->arch
.sie_block
->epoch
,
3568 (u64 __user
*)reg
->addr
);
3570 case KVM_REG_S390_CPU_TIMER
:
3571 r
= put_user(kvm_s390_get_cpu_timer(vcpu
),
3572 (u64 __user
*)reg
->addr
);
3574 case KVM_REG_S390_CLOCK_COMP
:
3575 r
= put_user(vcpu
->arch
.sie_block
->ckc
,
3576 (u64 __user
*)reg
->addr
);
3578 case KVM_REG_S390_PFTOKEN
:
3579 r
= put_user(vcpu
->arch
.pfault_token
,
3580 (u64 __user
*)reg
->addr
);
3582 case KVM_REG_S390_PFCOMPARE
:
3583 r
= put_user(vcpu
->arch
.pfault_compare
,
3584 (u64 __user
*)reg
->addr
);
3586 case KVM_REG_S390_PFSELECT
:
3587 r
= put_user(vcpu
->arch
.pfault_select
,
3588 (u64 __user
*)reg
->addr
);
3590 case KVM_REG_S390_PP
:
3591 r
= put_user(vcpu
->arch
.sie_block
->pp
,
3592 (u64 __user
*)reg
->addr
);
3594 case KVM_REG_S390_GBEA
:
3595 r
= put_user(vcpu
->arch
.sie_block
->gbea
,
3596 (u64 __user
*)reg
->addr
);
3605 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu
*vcpu
,
3606 struct kvm_one_reg
*reg
)
3612 case KVM_REG_S390_TODPR
:
3613 r
= get_user(vcpu
->arch
.sie_block
->todpr
,
3614 (u32 __user
*)reg
->addr
);
3616 case KVM_REG_S390_EPOCHDIFF
:
3617 r
= get_user(vcpu
->arch
.sie_block
->epoch
,
3618 (u64 __user
*)reg
->addr
);
3620 case KVM_REG_S390_CPU_TIMER
:
3621 r
= get_user(val
, (u64 __user
*)reg
->addr
);
3623 kvm_s390_set_cpu_timer(vcpu
, val
);
3625 case KVM_REG_S390_CLOCK_COMP
:
3626 r
= get_user(vcpu
->arch
.sie_block
->ckc
,
3627 (u64 __user
*)reg
->addr
);
3629 case KVM_REG_S390_PFTOKEN
:
3630 r
= get_user(vcpu
->arch
.pfault_token
,
3631 (u64 __user
*)reg
->addr
);
3632 if (vcpu
->arch
.pfault_token
== KVM_S390_PFAULT_TOKEN_INVALID
)
3633 kvm_clear_async_pf_completion_queue(vcpu
);
3635 case KVM_REG_S390_PFCOMPARE
:
3636 r
= get_user(vcpu
->arch
.pfault_compare
,
3637 (u64 __user
*)reg
->addr
);
3639 case KVM_REG_S390_PFSELECT
:
3640 r
= get_user(vcpu
->arch
.pfault_select
,
3641 (u64 __user
*)reg
->addr
);
3643 case KVM_REG_S390_PP
:
3644 r
= get_user(vcpu
->arch
.sie_block
->pp
,
3645 (u64 __user
*)reg
->addr
);
3647 case KVM_REG_S390_GBEA
:
3648 r
= get_user(vcpu
->arch
.sie_block
->gbea
,
3649 (u64 __user
*)reg
->addr
);
3658 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu
*vcpu
)
3660 vcpu
->arch
.sie_block
->gpsw
.mask
&= ~PSW_MASK_RI
;
3661 vcpu
->arch
.pfault_token
= KVM_S390_PFAULT_TOKEN_INVALID
;
3662 memset(vcpu
->run
->s
.regs
.riccb
, 0, sizeof(vcpu
->run
->s
.regs
.riccb
));
3664 kvm_clear_async_pf_completion_queue(vcpu
);
3665 if (!kvm_s390_user_cpu_state_ctrl(vcpu
->kvm
))
3666 kvm_s390_vcpu_stop(vcpu
);
3667 kvm_s390_clear_local_irqs(vcpu
);
3670 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu
*vcpu
)
3672 /* Initial reset is a superset of the normal reset */
3673 kvm_arch_vcpu_ioctl_normal_reset(vcpu
);
3676 * This equals initial cpu reset in pop, but we don't switch to ESA.
3677 * We do not only reset the internal data, but also ...
3679 vcpu
->arch
.sie_block
->gpsw
.mask
= 0;
3680 vcpu
->arch
.sie_block
->gpsw
.addr
= 0;
3681 kvm_s390_set_prefix(vcpu
, 0);
3682 kvm_s390_set_cpu_timer(vcpu
, 0);
3683 vcpu
->arch
.sie_block
->ckc
= 0;
3684 memset(vcpu
->arch
.sie_block
->gcr
, 0, sizeof(vcpu
->arch
.sie_block
->gcr
));
3685 vcpu
->arch
.sie_block
->gcr
[0] = CR0_INITIAL_MASK
;
3686 vcpu
->arch
.sie_block
->gcr
[14] = CR14_INITIAL_MASK
;
3688 /* ... the data in sync regs */
3689 memset(vcpu
->run
->s
.regs
.crs
, 0, sizeof(vcpu
->run
->s
.regs
.crs
));
3690 vcpu
->run
->s
.regs
.ckc
= 0;
3691 vcpu
->run
->s
.regs
.crs
[0] = CR0_INITIAL_MASK
;
3692 vcpu
->run
->s
.regs
.crs
[14] = CR14_INITIAL_MASK
;
3693 vcpu
->run
->psw_addr
= 0;
3694 vcpu
->run
->psw_mask
= 0;
3695 vcpu
->run
->s
.regs
.todpr
= 0;
3696 vcpu
->run
->s
.regs
.cputm
= 0;
3697 vcpu
->run
->s
.regs
.ckc
= 0;
3698 vcpu
->run
->s
.regs
.pp
= 0;
3699 vcpu
->run
->s
.regs
.gbea
= 1;
3700 vcpu
->run
->s
.regs
.fpc
= 0;
3702 * Do not reset these registers in the protected case, as some of
3703 * them are overlayed and they are not accessible in this case
3706 if (!kvm_s390_pv_cpu_is_protected(vcpu
)) {
3707 vcpu
->arch
.sie_block
->gbea
= 1;
3708 vcpu
->arch
.sie_block
->pp
= 0;
3709 vcpu
->arch
.sie_block
->fpf
&= ~FPF_BPBC
;
3710 vcpu
->arch
.sie_block
->todpr
= 0;
3714 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu
*vcpu
)
3716 struct kvm_sync_regs
*regs
= &vcpu
->run
->s
.regs
;
3718 /* Clear reset is a superset of the initial reset */
3719 kvm_arch_vcpu_ioctl_initial_reset(vcpu
);
3721 memset(®s
->gprs
, 0, sizeof(regs
->gprs
));
3722 memset(®s
->vrs
, 0, sizeof(regs
->vrs
));
3723 memset(®s
->acrs
, 0, sizeof(regs
->acrs
));
3724 memset(®s
->gscb
, 0, sizeof(regs
->gscb
));
3727 regs
->etoken_extension
= 0;
3730 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
3733 memcpy(&vcpu
->run
->s
.regs
.gprs
, ®s
->gprs
, sizeof(regs
->gprs
));
3738 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
3741 memcpy(®s
->gprs
, &vcpu
->run
->s
.regs
.gprs
, sizeof(regs
->gprs
));
3746 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu
,
3747 struct kvm_sregs
*sregs
)
3751 memcpy(&vcpu
->run
->s
.regs
.acrs
, &sregs
->acrs
, sizeof(sregs
->acrs
));
3752 memcpy(&vcpu
->arch
.sie_block
->gcr
, &sregs
->crs
, sizeof(sregs
->crs
));
3758 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
*vcpu
,
3759 struct kvm_sregs
*sregs
)
3763 memcpy(&sregs
->acrs
, &vcpu
->run
->s
.regs
.acrs
, sizeof(sregs
->acrs
));
3764 memcpy(&sregs
->crs
, &vcpu
->arch
.sie_block
->gcr
, sizeof(sregs
->crs
));
3770 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
3776 if (test_fp_ctl(fpu
->fpc
)) {
3780 vcpu
->run
->s
.regs
.fpc
= fpu
->fpc
;
3782 convert_fp_to_vx((__vector128
*) vcpu
->run
->s
.regs
.vrs
,
3783 (freg_t
*) fpu
->fprs
);
3785 memcpy(vcpu
->run
->s
.regs
.fprs
, &fpu
->fprs
, sizeof(fpu
->fprs
));
3792 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
3796 /* make sure we have the latest values */
3799 convert_vx_to_fp((freg_t
*) fpu
->fprs
,
3800 (__vector128
*) vcpu
->run
->s
.regs
.vrs
);
3802 memcpy(fpu
->fprs
, vcpu
->run
->s
.regs
.fprs
, sizeof(fpu
->fprs
));
3803 fpu
->fpc
= vcpu
->run
->s
.regs
.fpc
;
3809 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu
*vcpu
, psw_t psw
)
3813 if (!is_vcpu_stopped(vcpu
))
3816 vcpu
->run
->psw_mask
= psw
.mask
;
3817 vcpu
->run
->psw_addr
= psw
.addr
;
3822 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu
*vcpu
,
3823 struct kvm_translation
*tr
)
3825 return -EINVAL
; /* not implemented yet */
3828 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3829 KVM_GUESTDBG_USE_HW_BP | \
3830 KVM_GUESTDBG_ENABLE)
3832 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu
*vcpu
,
3833 struct kvm_guest_debug
*dbg
)
3839 vcpu
->guest_debug
= 0;
3840 kvm_s390_clear_bp_data(vcpu
);
3842 if (dbg
->control
& ~VALID_GUESTDBG_FLAGS
) {
3846 if (!sclp
.has_gpere
) {
3851 if (dbg
->control
& KVM_GUESTDBG_ENABLE
) {
3852 vcpu
->guest_debug
= dbg
->control
;
3853 /* enforce guest PER */
3854 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_P
);
3856 if (dbg
->control
& KVM_GUESTDBG_USE_HW_BP
)
3857 rc
= kvm_s390_import_bp_data(vcpu
, dbg
);
3859 kvm_s390_clear_cpuflags(vcpu
, CPUSTAT_P
);
3860 vcpu
->arch
.guestdbg
.last_bp
= 0;
3864 vcpu
->guest_debug
= 0;
3865 kvm_s390_clear_bp_data(vcpu
);
3866 kvm_s390_clear_cpuflags(vcpu
, CPUSTAT_P
);
3874 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu
*vcpu
,
3875 struct kvm_mp_state
*mp_state
)
3881 /* CHECK_STOP and LOAD are not supported yet */
3882 ret
= is_vcpu_stopped(vcpu
) ? KVM_MP_STATE_STOPPED
:
3883 KVM_MP_STATE_OPERATING
;
3889 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu
*vcpu
,
3890 struct kvm_mp_state
*mp_state
)
3896 /* user space knows about this interface - let it control the state */
3897 kvm_s390_set_user_cpu_state_ctrl(vcpu
->kvm
);
3899 switch (mp_state
->mp_state
) {
3900 case KVM_MP_STATE_STOPPED
:
3901 rc
= kvm_s390_vcpu_stop(vcpu
);
3903 case KVM_MP_STATE_OPERATING
:
3904 rc
= kvm_s390_vcpu_start(vcpu
);
3906 case KVM_MP_STATE_LOAD
:
3907 if (!kvm_s390_pv_cpu_is_protected(vcpu
)) {
3911 rc
= kvm_s390_pv_set_cpu_state(vcpu
, PV_CPU_STATE_OPR_LOAD
);
3913 case KVM_MP_STATE_CHECK_STOP
:
3914 fallthrough
; /* CHECK_STOP and LOAD are not supported yet */
3923 static bool ibs_enabled(struct kvm_vcpu
*vcpu
)
3925 return kvm_s390_test_cpuflags(vcpu
, CPUSTAT_IBS
);
3928 static int kvm_s390_handle_requests(struct kvm_vcpu
*vcpu
)
3931 kvm_s390_vcpu_request_handled(vcpu
);
3932 if (!kvm_request_pending(vcpu
))
3935 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3936 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3937 * This ensures that the ipte instruction for this request has
3938 * already finished. We might race against a second unmapper that
3939 * wants to set the blocking bit. Lets just retry the request loop.
3941 if (kvm_check_request(KVM_REQ_MMU_RELOAD
, vcpu
)) {
3943 rc
= gmap_mprotect_notify(vcpu
->arch
.gmap
,
3944 kvm_s390_get_prefix(vcpu
),
3945 PAGE_SIZE
* 2, PROT_WRITE
);
3947 kvm_make_request(KVM_REQ_MMU_RELOAD
, vcpu
);
3953 if (kvm_check_request(KVM_REQ_TLB_FLUSH
, vcpu
)) {
3954 vcpu
->arch
.sie_block
->ihcpu
= 0xffff;
3958 if (kvm_check_request(KVM_REQ_ENABLE_IBS
, vcpu
)) {
3959 if (!ibs_enabled(vcpu
)) {
3960 trace_kvm_s390_enable_disable_ibs(vcpu
->vcpu_id
, 1);
3961 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_IBS
);
3966 if (kvm_check_request(KVM_REQ_DISABLE_IBS
, vcpu
)) {
3967 if (ibs_enabled(vcpu
)) {
3968 trace_kvm_s390_enable_disable_ibs(vcpu
->vcpu_id
, 0);
3969 kvm_s390_clear_cpuflags(vcpu
, CPUSTAT_IBS
);
3974 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC
, vcpu
)) {
3975 vcpu
->arch
.sie_block
->ictl
|= ICTL_OPEREXC
;
3979 if (kvm_check_request(KVM_REQ_START_MIGRATION
, vcpu
)) {
3981 * Disable CMM virtualization; we will emulate the ESSA
3982 * instruction manually, in order to provide additional
3983 * functionalities needed for live migration.
3985 vcpu
->arch
.sie_block
->ecb2
&= ~ECB2_CMMA
;
3989 if (kvm_check_request(KVM_REQ_STOP_MIGRATION
, vcpu
)) {
3991 * Re-enable CMM virtualization if CMMA is available and
3992 * CMM has been used.
3994 if ((vcpu
->kvm
->arch
.use_cmma
) &&
3995 (vcpu
->kvm
->mm
->context
.uses_cmm
))
3996 vcpu
->arch
.sie_block
->ecb2
|= ECB2_CMMA
;
4000 /* nothing to do, just clear the request */
4001 kvm_clear_request(KVM_REQ_UNHALT
, vcpu
);
4002 /* we left the vsie handler, nothing to do, just clear the request */
4003 kvm_clear_request(KVM_REQ_VSIE_RESTART
, vcpu
);
4008 void kvm_s390_set_tod_clock(struct kvm
*kvm
,
4009 const struct kvm_s390_vm_tod_clock
*gtod
)
4011 struct kvm_vcpu
*vcpu
;
4012 union tod_clock clk
;
4015 mutex_lock(&kvm
->lock
);
4018 store_tod_clock_ext(&clk
);
4020 kvm
->arch
.epoch
= gtod
->tod
- clk
.tod
;
4022 if (test_kvm_facility(kvm
, 139)) {
4023 kvm
->arch
.epdx
= gtod
->epoch_idx
- clk
.ei
;
4024 if (kvm
->arch
.epoch
> gtod
->tod
)
4025 kvm
->arch
.epdx
-= 1;
4028 kvm_s390_vcpu_block_all(kvm
);
4029 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
4030 vcpu
->arch
.sie_block
->epoch
= kvm
->arch
.epoch
;
4031 vcpu
->arch
.sie_block
->epdx
= kvm
->arch
.epdx
;
4034 kvm_s390_vcpu_unblock_all(kvm
);
4036 mutex_unlock(&kvm
->lock
);
4040 * kvm_arch_fault_in_page - fault-in guest page if necessary
4041 * @vcpu: The corresponding virtual cpu
4042 * @gpa: Guest physical address
4043 * @writable: Whether the page should be writable or not
4045 * Make sure that a guest page has been faulted-in on the host.
4047 * Return: Zero on success, negative error code otherwise.
4049 long kvm_arch_fault_in_page(struct kvm_vcpu
*vcpu
, gpa_t gpa
, int writable
)
4051 return gmap_fault(vcpu
->arch
.gmap
, gpa
,
4052 writable
? FAULT_FLAG_WRITE
: 0);
4055 static void __kvm_inject_pfault_token(struct kvm_vcpu
*vcpu
, bool start_token
,
4056 unsigned long token
)
4058 struct kvm_s390_interrupt inti
;
4059 struct kvm_s390_irq irq
;
4062 irq
.u
.ext
.ext_params2
= token
;
4063 irq
.type
= KVM_S390_INT_PFAULT_INIT
;
4064 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu
, &irq
));
4066 inti
.type
= KVM_S390_INT_PFAULT_DONE
;
4067 inti
.parm64
= token
;
4068 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu
->kvm
, &inti
));
4072 bool kvm_arch_async_page_not_present(struct kvm_vcpu
*vcpu
,
4073 struct kvm_async_pf
*work
)
4075 trace_kvm_s390_pfault_init(vcpu
, work
->arch
.pfault_token
);
4076 __kvm_inject_pfault_token(vcpu
, true, work
->arch
.pfault_token
);
4081 void kvm_arch_async_page_present(struct kvm_vcpu
*vcpu
,
4082 struct kvm_async_pf
*work
)
4084 trace_kvm_s390_pfault_done(vcpu
, work
->arch
.pfault_token
);
4085 __kvm_inject_pfault_token(vcpu
, false, work
->arch
.pfault_token
);
4088 void kvm_arch_async_page_ready(struct kvm_vcpu
*vcpu
,
4089 struct kvm_async_pf
*work
)
4091 /* s390 will always inject the page directly */
4094 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu
*vcpu
)
4097 * s390 will always inject the page directly,
4098 * but we still want check_async_completion to cleanup
4103 static bool kvm_arch_setup_async_pf(struct kvm_vcpu
*vcpu
)
4106 struct kvm_arch_async_pf arch
;
4108 if (vcpu
->arch
.pfault_token
== KVM_S390_PFAULT_TOKEN_INVALID
)
4110 if ((vcpu
->arch
.sie_block
->gpsw
.mask
& vcpu
->arch
.pfault_select
) !=
4111 vcpu
->arch
.pfault_compare
)
4113 if (psw_extint_disabled(vcpu
))
4115 if (kvm_s390_vcpu_has_irq(vcpu
, 0))
4117 if (!(vcpu
->arch
.sie_block
->gcr
[0] & CR0_SERVICE_SIGNAL_SUBMASK
))
4119 if (!vcpu
->arch
.gmap
->pfault_enabled
)
4122 hva
= gfn_to_hva(vcpu
->kvm
, gpa_to_gfn(current
->thread
.gmap_addr
));
4123 hva
+= current
->thread
.gmap_addr
& ~PAGE_MASK
;
4124 if (read_guest_real(vcpu
, vcpu
->arch
.pfault_token
, &arch
.pfault_token
, 8))
4127 return kvm_setup_async_pf(vcpu
, current
->thread
.gmap_addr
, hva
, &arch
);
4130 static int vcpu_pre_run(struct kvm_vcpu
*vcpu
)
4135 * On s390 notifications for arriving pages will be delivered directly
4136 * to the guest but the house keeping for completed pfaults is
4137 * handled outside the worker.
4139 kvm_check_async_pf_completion(vcpu
);
4141 vcpu
->arch
.sie_block
->gg14
= vcpu
->run
->s
.regs
.gprs
[14];
4142 vcpu
->arch
.sie_block
->gg15
= vcpu
->run
->s
.regs
.gprs
[15];
4147 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
4148 rc
= kvm_s390_deliver_pending_interrupts(vcpu
);
4153 rc
= kvm_s390_handle_requests(vcpu
);
4157 if (guestdbg_enabled(vcpu
)) {
4158 kvm_s390_backup_guest_per_regs(vcpu
);
4159 kvm_s390_patch_guest_per_regs(vcpu
);
4162 clear_bit(vcpu
->vcpu_idx
, vcpu
->kvm
->arch
.gisa_int
.kicked_mask
);
4164 vcpu
->arch
.sie_block
->icptcode
= 0;
4165 cpuflags
= atomic_read(&vcpu
->arch
.sie_block
->cpuflags
);
4166 VCPU_EVENT(vcpu
, 6, "entering sie flags %x", cpuflags
);
4167 trace_kvm_s390_sie_enter(vcpu
, cpuflags
);
4172 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu
*vcpu
)
4174 struct kvm_s390_pgm_info pgm_info
= {
4175 .code
= PGM_ADDRESSING
,
4180 VCPU_EVENT(vcpu
, 3, "%s", "fault in sie instruction");
4181 trace_kvm_s390_sie_fault(vcpu
);
4184 * We want to inject an addressing exception, which is defined as a
4185 * suppressing or terminating exception. However, since we came here
4186 * by a DAT access exception, the PSW still points to the faulting
4187 * instruction since DAT exceptions are nullifying. So we've got
4188 * to look up the current opcode to get the length of the instruction
4189 * to be able to forward the PSW.
4191 rc
= read_guest_instr(vcpu
, vcpu
->arch
.sie_block
->gpsw
.addr
, &opcode
, 1);
4192 ilen
= insn_length(opcode
);
4196 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4197 * Forward by arbitrary ilc, injection will take care of
4198 * nullification if necessary.
4200 pgm_info
= vcpu
->arch
.pgm
;
4203 pgm_info
.flags
= ilen
| KVM_S390_PGM_FLAGS_ILC_VALID
;
4204 kvm_s390_forward_psw(vcpu
, ilen
);
4205 return kvm_s390_inject_prog_irq(vcpu
, &pgm_info
);
4208 static int vcpu_post_run(struct kvm_vcpu
*vcpu
, int exit_reason
)
4210 struct mcck_volatile_info
*mcck_info
;
4211 struct sie_page
*sie_page
;
4213 VCPU_EVENT(vcpu
, 6, "exit sie icptcode %d",
4214 vcpu
->arch
.sie_block
->icptcode
);
4215 trace_kvm_s390_sie_exit(vcpu
, vcpu
->arch
.sie_block
->icptcode
);
4217 if (guestdbg_enabled(vcpu
))
4218 kvm_s390_restore_guest_per_regs(vcpu
);
4220 vcpu
->run
->s
.regs
.gprs
[14] = vcpu
->arch
.sie_block
->gg14
;
4221 vcpu
->run
->s
.regs
.gprs
[15] = vcpu
->arch
.sie_block
->gg15
;
4223 if (exit_reason
== -EINTR
) {
4224 VCPU_EVENT(vcpu
, 3, "%s", "machine check");
4225 sie_page
= container_of(vcpu
->arch
.sie_block
,
4226 struct sie_page
, sie_block
);
4227 mcck_info
= &sie_page
->mcck_info
;
4228 kvm_s390_reinject_machine_check(vcpu
, mcck_info
);
4232 if (vcpu
->arch
.sie_block
->icptcode
> 0) {
4233 int rc
= kvm_handle_sie_intercept(vcpu
);
4235 if (rc
!= -EOPNOTSUPP
)
4237 vcpu
->run
->exit_reason
= KVM_EXIT_S390_SIEIC
;
4238 vcpu
->run
->s390_sieic
.icptcode
= vcpu
->arch
.sie_block
->icptcode
;
4239 vcpu
->run
->s390_sieic
.ipa
= vcpu
->arch
.sie_block
->ipa
;
4240 vcpu
->run
->s390_sieic
.ipb
= vcpu
->arch
.sie_block
->ipb
;
4242 } else if (exit_reason
!= -EFAULT
) {
4243 vcpu
->stat
.exit_null
++;
4245 } else if (kvm_is_ucontrol(vcpu
->kvm
)) {
4246 vcpu
->run
->exit_reason
= KVM_EXIT_S390_UCONTROL
;
4247 vcpu
->run
->s390_ucontrol
.trans_exc_code
=
4248 current
->thread
.gmap_addr
;
4249 vcpu
->run
->s390_ucontrol
.pgm_code
= 0x10;
4251 } else if (current
->thread
.gmap_pfault
) {
4252 trace_kvm_s390_major_guest_pfault(vcpu
);
4253 current
->thread
.gmap_pfault
= 0;
4254 if (kvm_arch_setup_async_pf(vcpu
))
4256 vcpu
->stat
.pfault_sync
++;
4257 return kvm_arch_fault_in_page(vcpu
, current
->thread
.gmap_addr
, 1);
4259 return vcpu_post_run_fault_in_sie(vcpu
);
4262 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4263 static int __vcpu_run(struct kvm_vcpu
*vcpu
)
4265 int rc
, exit_reason
;
4266 struct sie_page
*sie_page
= (struct sie_page
*)vcpu
->arch
.sie_block
;
4269 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4270 * ning the guest), so that memslots (and other stuff) are protected
4272 vcpu
->srcu_idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
4275 rc
= vcpu_pre_run(vcpu
);
4279 srcu_read_unlock(&vcpu
->kvm
->srcu
, vcpu
->srcu_idx
);
4281 * As PF_VCPU will be used in fault handler, between
4282 * guest_enter and guest_exit should be no uaccess.
4284 local_irq_disable();
4285 guest_enter_irqoff();
4286 __disable_cpu_timer_accounting(vcpu
);
4288 if (kvm_s390_pv_cpu_is_protected(vcpu
)) {
4289 memcpy(sie_page
->pv_grregs
,
4290 vcpu
->run
->s
.regs
.gprs
,
4291 sizeof(sie_page
->pv_grregs
));
4293 if (test_cpu_flag(CIF_FPU
))
4295 exit_reason
= sie64a(vcpu
->arch
.sie_block
,
4296 vcpu
->run
->s
.regs
.gprs
);
4297 if (kvm_s390_pv_cpu_is_protected(vcpu
)) {
4298 memcpy(vcpu
->run
->s
.regs
.gprs
,
4299 sie_page
->pv_grregs
,
4300 sizeof(sie_page
->pv_grregs
));
4302 * We're not allowed to inject interrupts on intercepts
4303 * that leave the guest state in an "in-between" state
4304 * where the next SIE entry will do a continuation.
4305 * Fence interrupts in our "internal" PSW.
4307 if (vcpu
->arch
.sie_block
->icptcode
== ICPT_PV_INSTR
||
4308 vcpu
->arch
.sie_block
->icptcode
== ICPT_PV_PREF
) {
4309 vcpu
->arch
.sie_block
->gpsw
.mask
&= ~PSW_INT_MASK
;
4312 local_irq_disable();
4313 __enable_cpu_timer_accounting(vcpu
);
4314 guest_exit_irqoff();
4316 vcpu
->srcu_idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
4318 rc
= vcpu_post_run(vcpu
, exit_reason
);
4319 } while (!signal_pending(current
) && !guestdbg_exit_pending(vcpu
) && !rc
);
4321 srcu_read_unlock(&vcpu
->kvm
->srcu
, vcpu
->srcu_idx
);
4325 static void sync_regs_fmt2(struct kvm_vcpu
*vcpu
)
4327 struct kvm_run
*kvm_run
= vcpu
->run
;
4328 struct runtime_instr_cb
*riccb
;
4331 riccb
= (struct runtime_instr_cb
*) &kvm_run
->s
.regs
.riccb
;
4332 gscb
= (struct gs_cb
*) &kvm_run
->s
.regs
.gscb
;
4333 vcpu
->arch
.sie_block
->gpsw
.mask
= kvm_run
->psw_mask
;
4334 vcpu
->arch
.sie_block
->gpsw
.addr
= kvm_run
->psw_addr
;
4335 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_ARCH0
) {
4336 vcpu
->arch
.sie_block
->todpr
= kvm_run
->s
.regs
.todpr
;
4337 vcpu
->arch
.sie_block
->pp
= kvm_run
->s
.regs
.pp
;
4338 vcpu
->arch
.sie_block
->gbea
= kvm_run
->s
.regs
.gbea
;
4340 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_PFAULT
) {
4341 vcpu
->arch
.pfault_token
= kvm_run
->s
.regs
.pft
;
4342 vcpu
->arch
.pfault_select
= kvm_run
->s
.regs
.pfs
;
4343 vcpu
->arch
.pfault_compare
= kvm_run
->s
.regs
.pfc
;
4344 if (vcpu
->arch
.pfault_token
== KVM_S390_PFAULT_TOKEN_INVALID
)
4345 kvm_clear_async_pf_completion_queue(vcpu
);
4347 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_DIAG318
) {
4348 vcpu
->arch
.diag318_info
.val
= kvm_run
->s
.regs
.diag318
;
4349 vcpu
->arch
.sie_block
->cpnc
= vcpu
->arch
.diag318_info
.cpnc
;
4350 VCPU_EVENT(vcpu
, 3, "setting cpnc to %d", vcpu
->arch
.diag318_info
.cpnc
);
4353 * If userspace sets the riccb (e.g. after migration) to a valid state,
4354 * we should enable RI here instead of doing the lazy enablement.
4356 if ((kvm_run
->kvm_dirty_regs
& KVM_SYNC_RICCB
) &&
4357 test_kvm_facility(vcpu
->kvm
, 64) &&
4359 !(vcpu
->arch
.sie_block
->ecb3
& ECB3_RI
)) {
4360 VCPU_EVENT(vcpu
, 3, "%s", "ENABLE: RI (sync_regs)");
4361 vcpu
->arch
.sie_block
->ecb3
|= ECB3_RI
;
4364 * If userspace sets the gscb (e.g. after migration) to non-zero,
4365 * we should enable GS here instead of doing the lazy enablement.
4367 if ((kvm_run
->kvm_dirty_regs
& KVM_SYNC_GSCB
) &&
4368 test_kvm_facility(vcpu
->kvm
, 133) &&
4370 !vcpu
->arch
.gs_enabled
) {
4371 VCPU_EVENT(vcpu
, 3, "%s", "ENABLE: GS (sync_regs)");
4372 vcpu
->arch
.sie_block
->ecb
|= ECB_GS
;
4373 vcpu
->arch
.sie_block
->ecd
|= ECD_HOSTREGMGMT
;
4374 vcpu
->arch
.gs_enabled
= 1;
4376 if ((kvm_run
->kvm_dirty_regs
& KVM_SYNC_BPBC
) &&
4377 test_kvm_facility(vcpu
->kvm
, 82)) {
4378 vcpu
->arch
.sie_block
->fpf
&= ~FPF_BPBC
;
4379 vcpu
->arch
.sie_block
->fpf
|= kvm_run
->s
.regs
.bpbc
? FPF_BPBC
: 0;
4381 if (MACHINE_HAS_GS
) {
4383 __ctl_set_bit(2, 4);
4384 if (current
->thread
.gs_cb
) {
4385 vcpu
->arch
.host_gscb
= current
->thread
.gs_cb
;
4386 save_gs_cb(vcpu
->arch
.host_gscb
);
4388 if (vcpu
->arch
.gs_enabled
) {
4389 current
->thread
.gs_cb
= (struct gs_cb
*)
4390 &vcpu
->run
->s
.regs
.gscb
;
4391 restore_gs_cb(current
->thread
.gs_cb
);
4395 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4398 static void sync_regs(struct kvm_vcpu
*vcpu
)
4400 struct kvm_run
*kvm_run
= vcpu
->run
;
4402 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_PREFIX
)
4403 kvm_s390_set_prefix(vcpu
, kvm_run
->s
.regs
.prefix
);
4404 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_CRS
) {
4405 memcpy(&vcpu
->arch
.sie_block
->gcr
, &kvm_run
->s
.regs
.crs
, 128);
4406 /* some control register changes require a tlb flush */
4407 kvm_make_request(KVM_REQ_TLB_FLUSH
, vcpu
);
4409 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_ARCH0
) {
4410 kvm_s390_set_cpu_timer(vcpu
, kvm_run
->s
.regs
.cputm
);
4411 vcpu
->arch
.sie_block
->ckc
= kvm_run
->s
.regs
.ckc
;
4413 save_access_regs(vcpu
->arch
.host_acrs
);
4414 restore_access_regs(vcpu
->run
->s
.regs
.acrs
);
4415 /* save host (userspace) fprs/vrs */
4417 vcpu
->arch
.host_fpregs
.fpc
= current
->thread
.fpu
.fpc
;
4418 vcpu
->arch
.host_fpregs
.regs
= current
->thread
.fpu
.regs
;
4420 current
->thread
.fpu
.regs
= vcpu
->run
->s
.regs
.vrs
;
4422 current
->thread
.fpu
.regs
= vcpu
->run
->s
.regs
.fprs
;
4423 current
->thread
.fpu
.fpc
= vcpu
->run
->s
.regs
.fpc
;
4424 if (test_fp_ctl(current
->thread
.fpu
.fpc
))
4425 /* User space provided an invalid FPC, let's clear it */
4426 current
->thread
.fpu
.fpc
= 0;
4428 /* Sync fmt2 only data */
4429 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu
))) {
4430 sync_regs_fmt2(vcpu
);
4433 * In several places we have to modify our internal view to
4434 * not do things that are disallowed by the ultravisor. For
4435 * example we must not inject interrupts after specific exits
4436 * (e.g. 112 prefix page not secure). We do this by turning
4437 * off the machine check, external and I/O interrupt bits
4438 * of our PSW copy. To avoid getting validity intercepts, we
4439 * do only accept the condition code from userspace.
4441 vcpu
->arch
.sie_block
->gpsw
.mask
&= ~PSW_MASK_CC
;
4442 vcpu
->arch
.sie_block
->gpsw
.mask
|= kvm_run
->psw_mask
&
4446 kvm_run
->kvm_dirty_regs
= 0;
4449 static void store_regs_fmt2(struct kvm_vcpu
*vcpu
)
4451 struct kvm_run
*kvm_run
= vcpu
->run
;
4453 kvm_run
->s
.regs
.todpr
= vcpu
->arch
.sie_block
->todpr
;
4454 kvm_run
->s
.regs
.pp
= vcpu
->arch
.sie_block
->pp
;
4455 kvm_run
->s
.regs
.gbea
= vcpu
->arch
.sie_block
->gbea
;
4456 kvm_run
->s
.regs
.bpbc
= (vcpu
->arch
.sie_block
->fpf
& FPF_BPBC
) == FPF_BPBC
;
4457 kvm_run
->s
.regs
.diag318
= vcpu
->arch
.diag318_info
.val
;
4458 if (MACHINE_HAS_GS
) {
4460 __ctl_set_bit(2, 4);
4461 if (vcpu
->arch
.gs_enabled
)
4462 save_gs_cb(current
->thread
.gs_cb
);
4463 current
->thread
.gs_cb
= vcpu
->arch
.host_gscb
;
4464 restore_gs_cb(vcpu
->arch
.host_gscb
);
4465 if (!vcpu
->arch
.host_gscb
)
4466 __ctl_clear_bit(2, 4);
4467 vcpu
->arch
.host_gscb
= NULL
;
4470 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4473 static void store_regs(struct kvm_vcpu
*vcpu
)
4475 struct kvm_run
*kvm_run
= vcpu
->run
;
4477 kvm_run
->psw_mask
= vcpu
->arch
.sie_block
->gpsw
.mask
;
4478 kvm_run
->psw_addr
= vcpu
->arch
.sie_block
->gpsw
.addr
;
4479 kvm_run
->s
.regs
.prefix
= kvm_s390_get_prefix(vcpu
);
4480 memcpy(&kvm_run
->s
.regs
.crs
, &vcpu
->arch
.sie_block
->gcr
, 128);
4481 kvm_run
->s
.regs
.cputm
= kvm_s390_get_cpu_timer(vcpu
);
4482 kvm_run
->s
.regs
.ckc
= vcpu
->arch
.sie_block
->ckc
;
4483 kvm_run
->s
.regs
.pft
= vcpu
->arch
.pfault_token
;
4484 kvm_run
->s
.regs
.pfs
= vcpu
->arch
.pfault_select
;
4485 kvm_run
->s
.regs
.pfc
= vcpu
->arch
.pfault_compare
;
4486 save_access_regs(vcpu
->run
->s
.regs
.acrs
);
4487 restore_access_regs(vcpu
->arch
.host_acrs
);
4488 /* Save guest register state */
4490 vcpu
->run
->s
.regs
.fpc
= current
->thread
.fpu
.fpc
;
4491 /* Restore will be done lazily at return */
4492 current
->thread
.fpu
.fpc
= vcpu
->arch
.host_fpregs
.fpc
;
4493 current
->thread
.fpu
.regs
= vcpu
->arch
.host_fpregs
.regs
;
4494 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu
)))
4495 store_regs_fmt2(vcpu
);
4498 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu
*vcpu
)
4500 struct kvm_run
*kvm_run
= vcpu
->run
;
4503 if (kvm_run
->immediate_exit
)
4506 if (kvm_run
->kvm_valid_regs
& ~KVM_SYNC_S390_VALID_FIELDS
||
4507 kvm_run
->kvm_dirty_regs
& ~KVM_SYNC_S390_VALID_FIELDS
)
4512 if (guestdbg_exit_pending(vcpu
)) {
4513 kvm_s390_prepare_debug_exit(vcpu
);
4518 kvm_sigset_activate(vcpu
);
4521 * no need to check the return value of vcpu_start as it can only have
4522 * an error for protvirt, but protvirt means user cpu state
4524 if (!kvm_s390_user_cpu_state_ctrl(vcpu
->kvm
)) {
4525 kvm_s390_vcpu_start(vcpu
);
4526 } else if (is_vcpu_stopped(vcpu
)) {
4527 pr_err_ratelimited("can't run stopped vcpu %d\n",
4534 enable_cpu_timer_accounting(vcpu
);
4537 rc
= __vcpu_run(vcpu
);
4539 if (signal_pending(current
) && !rc
) {
4540 kvm_run
->exit_reason
= KVM_EXIT_INTR
;
4544 if (guestdbg_exit_pending(vcpu
) && !rc
) {
4545 kvm_s390_prepare_debug_exit(vcpu
);
4549 if (rc
== -EREMOTE
) {
4550 /* userspace support is needed, kvm_run has been prepared */
4554 disable_cpu_timer_accounting(vcpu
);
4557 kvm_sigset_deactivate(vcpu
);
4559 vcpu
->stat
.exit_userspace
++;
4566 * store status at address
4567 * we use have two special cases:
4568 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4569 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4571 int kvm_s390_store_status_unloaded(struct kvm_vcpu
*vcpu
, unsigned long gpa
)
4573 unsigned char archmode
= 1;
4574 freg_t fprs
[NUM_FPRS
];
4579 px
= kvm_s390_get_prefix(vcpu
);
4580 if (gpa
== KVM_S390_STORE_STATUS_NOADDR
) {
4581 if (write_guest_abs(vcpu
, 163, &archmode
, 1))
4584 } else if (gpa
== KVM_S390_STORE_STATUS_PREFIXED
) {
4585 if (write_guest_real(vcpu
, 163, &archmode
, 1))
4589 gpa
-= __LC_FPREGS_SAVE_AREA
;
4591 /* manually convert vector registers if necessary */
4592 if (MACHINE_HAS_VX
) {
4593 convert_vx_to_fp(fprs
, (__vector128
*) vcpu
->run
->s
.regs
.vrs
);
4594 rc
= write_guest_abs(vcpu
, gpa
+ __LC_FPREGS_SAVE_AREA
,
4597 rc
= write_guest_abs(vcpu
, gpa
+ __LC_FPREGS_SAVE_AREA
,
4598 vcpu
->run
->s
.regs
.fprs
, 128);
4600 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_GPREGS_SAVE_AREA
,
4601 vcpu
->run
->s
.regs
.gprs
, 128);
4602 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_PSW_SAVE_AREA
,
4603 &vcpu
->arch
.sie_block
->gpsw
, 16);
4604 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_PREFIX_SAVE_AREA
,
4606 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_FP_CREG_SAVE_AREA
,
4607 &vcpu
->run
->s
.regs
.fpc
, 4);
4608 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_TOD_PROGREG_SAVE_AREA
,
4609 &vcpu
->arch
.sie_block
->todpr
, 4);
4610 cputm
= kvm_s390_get_cpu_timer(vcpu
);
4611 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_CPU_TIMER_SAVE_AREA
,
4613 clkcomp
= vcpu
->arch
.sie_block
->ckc
>> 8;
4614 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_CLOCK_COMP_SAVE_AREA
,
4616 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_AREGS_SAVE_AREA
,
4617 &vcpu
->run
->s
.regs
.acrs
, 64);
4618 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_CREGS_SAVE_AREA
,
4619 &vcpu
->arch
.sie_block
->gcr
, 128);
4620 return rc
? -EFAULT
: 0;
4623 int kvm_s390_vcpu_store_status(struct kvm_vcpu
*vcpu
, unsigned long addr
)
4626 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4627 * switch in the run ioctl. Let's update our copies before we save
4628 * it into the save area
4631 vcpu
->run
->s
.regs
.fpc
= current
->thread
.fpu
.fpc
;
4632 save_access_regs(vcpu
->run
->s
.regs
.acrs
);
4634 return kvm_s390_store_status_unloaded(vcpu
, addr
);
4637 static void __disable_ibs_on_vcpu(struct kvm_vcpu
*vcpu
)
4639 kvm_check_request(KVM_REQ_ENABLE_IBS
, vcpu
);
4640 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS
, vcpu
);
4643 static void __disable_ibs_on_all_vcpus(struct kvm
*kvm
)
4646 struct kvm_vcpu
*vcpu
;
4648 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
4649 __disable_ibs_on_vcpu(vcpu
);
4653 static void __enable_ibs_on_vcpu(struct kvm_vcpu
*vcpu
)
4657 kvm_check_request(KVM_REQ_DISABLE_IBS
, vcpu
);
4658 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS
, vcpu
);
4661 int kvm_s390_vcpu_start(struct kvm_vcpu
*vcpu
)
4663 int i
, online_vcpus
, r
= 0, started_vcpus
= 0;
4665 if (!is_vcpu_stopped(vcpu
))
4668 trace_kvm_s390_vcpu_start_stop(vcpu
->vcpu_id
, 1);
4669 /* Only one cpu at a time may enter/leave the STOPPED state. */
4670 spin_lock(&vcpu
->kvm
->arch
.start_stop_lock
);
4671 online_vcpus
= atomic_read(&vcpu
->kvm
->online_vcpus
);
4673 /* Let's tell the UV that we want to change into the operating state */
4674 if (kvm_s390_pv_cpu_is_protected(vcpu
)) {
4675 r
= kvm_s390_pv_set_cpu_state(vcpu
, PV_CPU_STATE_OPR
);
4677 spin_unlock(&vcpu
->kvm
->arch
.start_stop_lock
);
4682 for (i
= 0; i
< online_vcpus
; i
++) {
4683 if (!is_vcpu_stopped(vcpu
->kvm
->vcpus
[i
]))
4687 if (started_vcpus
== 0) {
4688 /* we're the only active VCPU -> speed it up */
4689 __enable_ibs_on_vcpu(vcpu
);
4690 } else if (started_vcpus
== 1) {
4692 * As we are starting a second VCPU, we have to disable
4693 * the IBS facility on all VCPUs to remove potentially
4694 * outstanding ENABLE requests.
4696 __disable_ibs_on_all_vcpus(vcpu
->kvm
);
4699 kvm_s390_clear_cpuflags(vcpu
, CPUSTAT_STOPPED
);
4701 * The real PSW might have changed due to a RESTART interpreted by the
4702 * ultravisor. We block all interrupts and let the next sie exit
4705 if (kvm_s390_pv_cpu_is_protected(vcpu
))
4706 vcpu
->arch
.sie_block
->gpsw
.mask
&= ~PSW_INT_MASK
;
4708 * Another VCPU might have used IBS while we were offline.
4709 * Let's play safe and flush the VCPU at startup.
4711 kvm_make_request(KVM_REQ_TLB_FLUSH
, vcpu
);
4712 spin_unlock(&vcpu
->kvm
->arch
.start_stop_lock
);
4716 int kvm_s390_vcpu_stop(struct kvm_vcpu
*vcpu
)
4718 int i
, online_vcpus
, r
= 0, started_vcpus
= 0;
4719 struct kvm_vcpu
*started_vcpu
= NULL
;
4721 if (is_vcpu_stopped(vcpu
))
4724 trace_kvm_s390_vcpu_start_stop(vcpu
->vcpu_id
, 0);
4725 /* Only one cpu at a time may enter/leave the STOPPED state. */
4726 spin_lock(&vcpu
->kvm
->arch
.start_stop_lock
);
4727 online_vcpus
= atomic_read(&vcpu
->kvm
->online_vcpus
);
4729 /* Let's tell the UV that we want to change into the stopped state */
4730 if (kvm_s390_pv_cpu_is_protected(vcpu
)) {
4731 r
= kvm_s390_pv_set_cpu_state(vcpu
, PV_CPU_STATE_STP
);
4733 spin_unlock(&vcpu
->kvm
->arch
.start_stop_lock
);
4739 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4740 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4741 * have been fully processed. This will ensure that the VCPU
4742 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4744 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_STOPPED
);
4745 kvm_s390_clear_stop_irq(vcpu
);
4747 __disable_ibs_on_vcpu(vcpu
);
4749 for (i
= 0; i
< online_vcpus
; i
++) {
4750 if (!is_vcpu_stopped(vcpu
->kvm
->vcpus
[i
])) {
4752 started_vcpu
= vcpu
->kvm
->vcpus
[i
];
4756 if (started_vcpus
== 1) {
4758 * As we only have one VCPU left, we want to enable the
4759 * IBS facility for that VCPU to speed it up.
4761 __enable_ibs_on_vcpu(started_vcpu
);
4764 spin_unlock(&vcpu
->kvm
->arch
.start_stop_lock
);
4768 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu
*vcpu
,
4769 struct kvm_enable_cap
*cap
)
4777 case KVM_CAP_S390_CSS_SUPPORT
:
4778 if (!vcpu
->kvm
->arch
.css_support
) {
4779 vcpu
->kvm
->arch
.css_support
= 1;
4780 VM_EVENT(vcpu
->kvm
, 3, "%s", "ENABLE: CSS support");
4781 trace_kvm_s390_enable_css(vcpu
->kvm
);
4792 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu
*vcpu
,
4793 struct kvm_s390_mem_op
*mop
)
4795 void __user
*uaddr
= (void __user
*)mop
->buf
;
4798 if (mop
->flags
|| !mop
->size
)
4800 if (mop
->size
+ mop
->sida_offset
< mop
->size
)
4802 if (mop
->size
+ mop
->sida_offset
> sida_size(vcpu
->arch
.sie_block
))
4804 if (!kvm_s390_pv_cpu_is_protected(vcpu
))
4808 case KVM_S390_MEMOP_SIDA_READ
:
4809 if (copy_to_user(uaddr
, (void *)(sida_origin(vcpu
->arch
.sie_block
) +
4810 mop
->sida_offset
), mop
->size
))
4814 case KVM_S390_MEMOP_SIDA_WRITE
:
4815 if (copy_from_user((void *)(sida_origin(vcpu
->arch
.sie_block
) +
4816 mop
->sida_offset
), uaddr
, mop
->size
))
4823 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu
*vcpu
,
4824 struct kvm_s390_mem_op
*mop
)
4826 void __user
*uaddr
= (void __user
*)mop
->buf
;
4827 void *tmpbuf
= NULL
;
4829 const u64 supported_flags
= KVM_S390_MEMOP_F_INJECT_EXCEPTION
4830 | KVM_S390_MEMOP_F_CHECK_ONLY
4831 | KVM_S390_MEMOP_F_SKEY_PROTECTION
;
4833 if (mop
->flags
& ~supported_flags
|| mop
->ar
>= NUM_ACRS
|| !mop
->size
)
4835 if (mop
->size
> MEM_OP_MAX_SIZE
)
4837 if (kvm_s390_pv_cpu_is_protected(vcpu
))
4839 if (mop
->flags
& KVM_S390_MEMOP_F_SKEY_PROTECTION
) {
4840 if (access_key_invalid(mop
->key
))
4845 if (!(mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
)) {
4846 tmpbuf
= vmalloc(mop
->size
);
4852 case KVM_S390_MEMOP_LOGICAL_READ
:
4853 if (mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
) {
4854 r
= check_gva_range(vcpu
, mop
->gaddr
, mop
->ar
, mop
->size
,
4855 GACC_FETCH
, mop
->key
);
4858 r
= read_guest_with_key(vcpu
, mop
->gaddr
, mop
->ar
, tmpbuf
,
4859 mop
->size
, mop
->key
);
4861 if (copy_to_user(uaddr
, tmpbuf
, mop
->size
))
4865 case KVM_S390_MEMOP_LOGICAL_WRITE
:
4866 if (mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
) {
4867 r
= check_gva_range(vcpu
, mop
->gaddr
, mop
->ar
, mop
->size
,
4868 GACC_STORE
, mop
->key
);
4871 if (copy_from_user(tmpbuf
, uaddr
, mop
->size
)) {
4875 r
= write_guest_with_key(vcpu
, mop
->gaddr
, mop
->ar
, tmpbuf
,
4876 mop
->size
, mop
->key
);
4880 if (r
> 0 && (mop
->flags
& KVM_S390_MEMOP_F_INJECT_EXCEPTION
) != 0)
4881 kvm_s390_inject_prog_irq(vcpu
, &vcpu
->arch
.pgm
);
4887 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu
*vcpu
,
4888 struct kvm_s390_mem_op
*mop
)
4892 srcu_idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
4895 case KVM_S390_MEMOP_LOGICAL_READ
:
4896 case KVM_S390_MEMOP_LOGICAL_WRITE
:
4897 r
= kvm_s390_vcpu_mem_op(vcpu
, mop
);
4899 case KVM_S390_MEMOP_SIDA_READ
:
4900 case KVM_S390_MEMOP_SIDA_WRITE
:
4901 /* we are locked against sida going away by the vcpu->mutex */
4902 r
= kvm_s390_vcpu_sida_op(vcpu
, mop
);
4908 srcu_read_unlock(&vcpu
->kvm
->srcu
, srcu_idx
);
4912 long kvm_arch_vcpu_async_ioctl(struct file
*filp
,
4913 unsigned int ioctl
, unsigned long arg
)
4915 struct kvm_vcpu
*vcpu
= filp
->private_data
;
4916 void __user
*argp
= (void __user
*)arg
;
4919 case KVM_S390_IRQ
: {
4920 struct kvm_s390_irq s390irq
;
4922 if (copy_from_user(&s390irq
, argp
, sizeof(s390irq
)))
4924 return kvm_s390_inject_vcpu(vcpu
, &s390irq
);
4926 case KVM_S390_INTERRUPT
: {
4927 struct kvm_s390_interrupt s390int
;
4928 struct kvm_s390_irq s390irq
= {};
4930 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
4932 if (s390int_to_s390irq(&s390int
, &s390irq
))
4934 return kvm_s390_inject_vcpu(vcpu
, &s390irq
);
4937 return -ENOIOCTLCMD
;
4940 long kvm_arch_vcpu_ioctl(struct file
*filp
,
4941 unsigned int ioctl
, unsigned long arg
)
4943 struct kvm_vcpu
*vcpu
= filp
->private_data
;
4944 void __user
*argp
= (void __user
*)arg
;
4952 case KVM_S390_STORE_STATUS
:
4953 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
4954 r
= kvm_s390_store_status_unloaded(vcpu
, arg
);
4955 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
4957 case KVM_S390_SET_INITIAL_PSW
: {
4961 if (copy_from_user(&psw
, argp
, sizeof(psw
)))
4963 r
= kvm_arch_vcpu_ioctl_set_initial_psw(vcpu
, psw
);
4966 case KVM_S390_CLEAR_RESET
:
4968 kvm_arch_vcpu_ioctl_clear_reset(vcpu
);
4969 if (kvm_s390_pv_cpu_is_protected(vcpu
)) {
4970 r
= uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu
),
4971 UVC_CMD_CPU_RESET_CLEAR
, &rc
, &rrc
);
4972 VCPU_EVENT(vcpu
, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4976 case KVM_S390_INITIAL_RESET
:
4978 kvm_arch_vcpu_ioctl_initial_reset(vcpu
);
4979 if (kvm_s390_pv_cpu_is_protected(vcpu
)) {
4980 r
= uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu
),
4981 UVC_CMD_CPU_RESET_INITIAL
,
4983 VCPU_EVENT(vcpu
, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4987 case KVM_S390_NORMAL_RESET
:
4989 kvm_arch_vcpu_ioctl_normal_reset(vcpu
);
4990 if (kvm_s390_pv_cpu_is_protected(vcpu
)) {
4991 r
= uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu
),
4992 UVC_CMD_CPU_RESET
, &rc
, &rrc
);
4993 VCPU_EVENT(vcpu
, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4997 case KVM_SET_ONE_REG
:
4998 case KVM_GET_ONE_REG
: {
4999 struct kvm_one_reg reg
;
5001 if (kvm_s390_pv_cpu_is_protected(vcpu
))
5004 if (copy_from_user(®
, argp
, sizeof(reg
)))
5006 if (ioctl
== KVM_SET_ONE_REG
)
5007 r
= kvm_arch_vcpu_ioctl_set_one_reg(vcpu
, ®
);
5009 r
= kvm_arch_vcpu_ioctl_get_one_reg(vcpu
, ®
);
5012 #ifdef CONFIG_KVM_S390_UCONTROL
5013 case KVM_S390_UCAS_MAP
: {
5014 struct kvm_s390_ucas_mapping ucasmap
;
5016 if (copy_from_user(&ucasmap
, argp
, sizeof(ucasmap
))) {
5021 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
5026 r
= gmap_map_segment(vcpu
->arch
.gmap
, ucasmap
.user_addr
,
5027 ucasmap
.vcpu_addr
, ucasmap
.length
);
5030 case KVM_S390_UCAS_UNMAP
: {
5031 struct kvm_s390_ucas_mapping ucasmap
;
5033 if (copy_from_user(&ucasmap
, argp
, sizeof(ucasmap
))) {
5038 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
5043 r
= gmap_unmap_segment(vcpu
->arch
.gmap
, ucasmap
.vcpu_addr
,
5048 case KVM_S390_VCPU_FAULT
: {
5049 r
= gmap_fault(vcpu
->arch
.gmap
, arg
, 0);
5052 case KVM_ENABLE_CAP
:
5054 struct kvm_enable_cap cap
;
5056 if (copy_from_user(&cap
, argp
, sizeof(cap
)))
5058 r
= kvm_vcpu_ioctl_enable_cap(vcpu
, &cap
);
5061 case KVM_S390_MEM_OP
: {
5062 struct kvm_s390_mem_op mem_op
;
5064 if (copy_from_user(&mem_op
, argp
, sizeof(mem_op
)) == 0)
5065 r
= kvm_s390_vcpu_memsida_op(vcpu
, &mem_op
);
5070 case KVM_S390_SET_IRQ_STATE
: {
5071 struct kvm_s390_irq_state irq_state
;
5074 if (copy_from_user(&irq_state
, argp
, sizeof(irq_state
)))
5076 if (irq_state
.len
> VCPU_IRQS_MAX_BUF
||
5077 irq_state
.len
== 0 ||
5078 irq_state
.len
% sizeof(struct kvm_s390_irq
) > 0) {
5082 /* do not use irq_state.flags, it will break old QEMUs */
5083 r
= kvm_s390_set_irq_state(vcpu
,
5084 (void __user
*) irq_state
.buf
,
5088 case KVM_S390_GET_IRQ_STATE
: {
5089 struct kvm_s390_irq_state irq_state
;
5092 if (copy_from_user(&irq_state
, argp
, sizeof(irq_state
)))
5094 if (irq_state
.len
== 0) {
5098 /* do not use irq_state.flags, it will break old QEMUs */
5099 r
= kvm_s390_get_irq_state(vcpu
,
5100 (__u8 __user
*) irq_state
.buf
,
5112 vm_fault_t
kvm_arch_vcpu_fault(struct kvm_vcpu
*vcpu
, struct vm_fault
*vmf
)
5114 #ifdef CONFIG_KVM_S390_UCONTROL
5115 if ((vmf
->pgoff
== KVM_S390_SIE_PAGE_OFFSET
)
5116 && (kvm_is_ucontrol(vcpu
->kvm
))) {
5117 vmf
->page
= virt_to_page(vcpu
->arch
.sie_block
);
5118 get_page(vmf
->page
);
5122 return VM_FAULT_SIGBUS
;
5125 /* Section: memory related */
5126 int kvm_arch_prepare_memory_region(struct kvm
*kvm
,
5127 struct kvm_memory_slot
*memslot
,
5128 const struct kvm_userspace_memory_region
*mem
,
5129 enum kvm_mr_change change
)
5131 /* A few sanity checks. We can have memory slots which have to be
5132 located/ended at a segment boundary (1MB). The memory in userland is
5133 ok to be fragmented into various different vmas. It is okay to mmap()
5134 and munmap() stuff in this slot after doing this call at any time */
5136 if (mem
->userspace_addr
& 0xffffful
)
5139 if (mem
->memory_size
& 0xffffful
)
5142 if (mem
->guest_phys_addr
+ mem
->memory_size
> kvm
->arch
.mem_limit
)
5145 /* When we are protected, we should not change the memory slots */
5146 if (kvm_s390_pv_get_handle(kvm
))
5151 void kvm_arch_commit_memory_region(struct kvm
*kvm
,
5152 const struct kvm_userspace_memory_region
*mem
,
5153 struct kvm_memory_slot
*old
,
5154 const struct kvm_memory_slot
*new,
5155 enum kvm_mr_change change
)
5161 rc
= gmap_unmap_segment(kvm
->arch
.gmap
, old
->base_gfn
* PAGE_SIZE
,
5162 old
->npages
* PAGE_SIZE
);
5165 rc
= gmap_unmap_segment(kvm
->arch
.gmap
, old
->base_gfn
* PAGE_SIZE
,
5166 old
->npages
* PAGE_SIZE
);
5171 rc
= gmap_map_segment(kvm
->arch
.gmap
, mem
->userspace_addr
,
5172 mem
->guest_phys_addr
, mem
->memory_size
);
5174 case KVM_MR_FLAGS_ONLY
:
5177 WARN(1, "Unknown KVM MR CHANGE: %d\n", change
);
5180 pr_warn("failed to commit memory region\n");
5184 static inline unsigned long nonhyp_mask(int i
)
5186 unsigned int nonhyp_fai
= (sclp
.hmfai
<< i
* 2) >> 30;
5188 return 0x0000ffffffffffffUL
>> (nonhyp_fai
<< 4);
5191 void kvm_arch_vcpu_block_finish(struct kvm_vcpu
*vcpu
)
5193 vcpu
->valid_wakeup
= false;
5196 static int __init
kvm_s390_init(void)
5200 if (!sclp
.has_sief2
) {
5201 pr_info("SIE is not available\n");
5205 if (nested
&& hpage
) {
5206 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5210 for (i
= 0; i
< 16; i
++)
5211 kvm_s390_fac_base
[i
] |=
5212 stfle_fac_list
[i
] & nonhyp_mask(i
);
5214 return kvm_init(NULL
, sizeof(struct kvm_vcpu
), 0, THIS_MODULE
);
5217 static void __exit
kvm_s390_exit(void)
5222 module_init(kvm_s390_init
);
5223 module_exit(kvm_s390_exit
);
5226 * Enable autoloading of the kvm module.
5227 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5228 * since x86 takes a different approach.
5230 #include <linux/miscdevice.h>
5231 MODULE_ALIAS_MISCDEV(KVM_MINOR
);
5232 MODULE_ALIAS("devname:kvm");