]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - arch/s390/kvm/vsie.c
Merge remote-tracking branches 'spi/fix/armada', 'spi/fix/atmel', 'spi/fix/doc',...
[mirror_ubuntu-hirsute-kernel.git] / arch / s390 / kvm / vsie.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * kvm nested virtualization support for s390x
4 *
5 * Copyright IBM Corp. 2016
6 *
7 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
8 */
9 #include <linux/vmalloc.h>
10 #include <linux/kvm_host.h>
11 #include <linux/bug.h>
12 #include <linux/list.h>
13 #include <linux/bitmap.h>
14 #include <linux/sched/signal.h>
15
16 #include <asm/gmap.h>
17 #include <asm/mmu_context.h>
18 #include <asm/sclp.h>
19 #include <asm/nmi.h>
20 #include <asm/dis.h>
21 #include "kvm-s390.h"
22 #include "gaccess.h"
23
24 struct vsie_page {
25 struct kvm_s390_sie_block scb_s; /* 0x0000 */
26 /*
27 * the backup info for machine check. ensure it's at
28 * the same offset as that in struct sie_page!
29 */
30 struct mcck_volatile_info mcck_info; /* 0x0200 */
31 /* the pinned originial scb */
32 struct kvm_s390_sie_block *scb_o; /* 0x0218 */
33 /* the shadow gmap in use by the vsie_page */
34 struct gmap *gmap; /* 0x0220 */
35 /* address of the last reported fault to guest2 */
36 unsigned long fault_addr; /* 0x0228 */
37 __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
38 struct kvm_s390_crypto_cb crycb; /* 0x0700 */
39 __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
40 };
41
42 /* trigger a validity icpt for the given scb */
43 static int set_validity_icpt(struct kvm_s390_sie_block *scb,
44 __u16 reason_code)
45 {
46 scb->ipa = 0x1000;
47 scb->ipb = ((__u32) reason_code) << 16;
48 scb->icptcode = ICPT_VALIDITY;
49 return 1;
50 }
51
52 /* mark the prefix as unmapped, this will block the VSIE */
53 static void prefix_unmapped(struct vsie_page *vsie_page)
54 {
55 atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
56 }
57
58 /* mark the prefix as unmapped and wait until the VSIE has been left */
59 static void prefix_unmapped_sync(struct vsie_page *vsie_page)
60 {
61 prefix_unmapped(vsie_page);
62 if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
63 atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
64 while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
65 cpu_relax();
66 }
67
68 /* mark the prefix as mapped, this will allow the VSIE to run */
69 static void prefix_mapped(struct vsie_page *vsie_page)
70 {
71 atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
72 }
73
74 /* test if the prefix is mapped into the gmap shadow */
75 static int prefix_is_mapped(struct vsie_page *vsie_page)
76 {
77 return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
78 }
79
80 /* copy the updated intervention request bits into the shadow scb */
81 static void update_intervention_requests(struct vsie_page *vsie_page)
82 {
83 const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
84 int cpuflags;
85
86 cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
87 atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
88 atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
89 }
90
91 /* shadow (filter and validate) the cpuflags */
92 static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
93 {
94 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
95 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
96 int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
97
98 /* we don't allow ESA/390 guests */
99 if (!(cpuflags & CPUSTAT_ZARCH))
100 return set_validity_icpt(scb_s, 0x0001U);
101
102 if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
103 return set_validity_icpt(scb_s, 0x0001U);
104 else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
105 return set_validity_icpt(scb_s, 0x0007U);
106
107 /* intervention requests will be set later */
108 newflags = CPUSTAT_ZARCH;
109 if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
110 newflags |= CPUSTAT_GED;
111 if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
112 if (cpuflags & CPUSTAT_GED)
113 return set_validity_icpt(scb_s, 0x0001U);
114 newflags |= CPUSTAT_GED2;
115 }
116 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
117 newflags |= cpuflags & CPUSTAT_P;
118 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
119 newflags |= cpuflags & CPUSTAT_SM;
120 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
121 newflags |= cpuflags & CPUSTAT_IBS;
122 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
123 newflags |= cpuflags & CPUSTAT_KSS;
124
125 atomic_set(&scb_s->cpuflags, newflags);
126 return 0;
127 }
128
129 /*
130 * Create a shadow copy of the crycb block and setup key wrapping, if
131 * requested for guest 3 and enabled for guest 2.
132 *
133 * We only accept format-1 (no AP in g2), but convert it into format-2
134 * There is nothing to do for format-0.
135 *
136 * Returns: - 0 if shadowed or nothing to do
137 * - > 0 if control has to be given to guest 2
138 */
139 static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
140 {
141 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
142 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
143 u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U;
144 unsigned long *b1, *b2;
145 u8 ecb3_flags;
146
147 scb_s->crycbd = 0;
148 if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
149 return 0;
150 /* format-1 is supported with message-security-assist extension 3 */
151 if (!test_kvm_facility(vcpu->kvm, 76))
152 return 0;
153 /* we may only allow it if enabled for guest 2 */
154 ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
155 (ECB3_AES | ECB3_DEA);
156 if (!ecb3_flags)
157 return 0;
158
159 if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
160 return set_validity_icpt(scb_s, 0x003CU);
161 else if (!crycb_addr)
162 return set_validity_icpt(scb_s, 0x0039U);
163
164 /* copy only the wrapping keys */
165 if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56))
166 return set_validity_icpt(scb_s, 0x0035U);
167
168 scb_s->ecb3 |= ecb3_flags;
169 scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
170 CRYCB_FORMAT2;
171
172 /* xor both blocks in one run */
173 b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
174 b2 = (unsigned long *)
175 vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
176 /* as 56%8 == 0, bitmap_xor won't overwrite any data */
177 bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
178 return 0;
179 }
180
181 /* shadow (round up/down) the ibc to avoid validity icpt */
182 static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
183 {
184 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
185 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
186 __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
187
188 scb_s->ibc = 0;
189 /* ibc installed in g2 and requested for g3 */
190 if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) {
191 scb_s->ibc = scb_o->ibc & 0x0fffU;
192 /* takte care of the minimum ibc level of the machine */
193 if (scb_s->ibc < min_ibc)
194 scb_s->ibc = min_ibc;
195 /* take care of the maximum ibc level set for the guest */
196 if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
197 scb_s->ibc = vcpu->kvm->arch.model.ibc;
198 }
199 }
200
201 /* unshadow the scb, copying parameters back to the real scb */
202 static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
203 {
204 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
205 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
206
207 /* interception */
208 scb_o->icptcode = scb_s->icptcode;
209 scb_o->icptstatus = scb_s->icptstatus;
210 scb_o->ipa = scb_s->ipa;
211 scb_o->ipb = scb_s->ipb;
212 scb_o->gbea = scb_s->gbea;
213
214 /* timer */
215 scb_o->cputm = scb_s->cputm;
216 scb_o->ckc = scb_s->ckc;
217 scb_o->todpr = scb_s->todpr;
218
219 /* guest state */
220 scb_o->gpsw = scb_s->gpsw;
221 scb_o->gg14 = scb_s->gg14;
222 scb_o->gg15 = scb_s->gg15;
223 memcpy(scb_o->gcr, scb_s->gcr, 128);
224 scb_o->pp = scb_s->pp;
225
226 /* interrupt intercept */
227 switch (scb_s->icptcode) {
228 case ICPT_PROGI:
229 case ICPT_INSTPROGI:
230 case ICPT_EXTINT:
231 memcpy((void *)((u64)scb_o + 0xc0),
232 (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
233 break;
234 case ICPT_PARTEXEC:
235 /* MVPG only */
236 memcpy((void *)((u64)scb_o + 0xc0),
237 (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
238 break;
239 }
240
241 if (scb_s->ihcpu != 0xffffU)
242 scb_o->ihcpu = scb_s->ihcpu;
243 }
244
245 /*
246 * Setup the shadow scb by copying and checking the relevant parts of the g2
247 * provided scb.
248 *
249 * Returns: - 0 if the scb has been shadowed
250 * - > 0 if control has to be given to guest 2
251 */
252 static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
253 {
254 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
255 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
256 bool had_tx = scb_s->ecb & ECB_TE;
257 unsigned long new_mso = 0;
258 int rc;
259
260 /* make sure we don't have any leftovers when reusing the scb */
261 scb_s->icptcode = 0;
262 scb_s->eca = 0;
263 scb_s->ecb = 0;
264 scb_s->ecb2 = 0;
265 scb_s->ecb3 = 0;
266 scb_s->ecd = 0;
267 scb_s->fac = 0;
268
269 rc = prepare_cpuflags(vcpu, vsie_page);
270 if (rc)
271 goto out;
272
273 /* timer */
274 scb_s->cputm = scb_o->cputm;
275 scb_s->ckc = scb_o->ckc;
276 scb_s->todpr = scb_o->todpr;
277 scb_s->epoch = scb_o->epoch;
278
279 /* guest state */
280 scb_s->gpsw = scb_o->gpsw;
281 scb_s->gg14 = scb_o->gg14;
282 scb_s->gg15 = scb_o->gg15;
283 memcpy(scb_s->gcr, scb_o->gcr, 128);
284 scb_s->pp = scb_o->pp;
285
286 /* interception / execution handling */
287 scb_s->gbea = scb_o->gbea;
288 scb_s->lctl = scb_o->lctl;
289 scb_s->svcc = scb_o->svcc;
290 scb_s->ictl = scb_o->ictl;
291 /*
292 * SKEY handling functions can't deal with false setting of PTE invalid
293 * bits. Therefore we cannot provide interpretation and would later
294 * have to provide own emulation handlers.
295 */
296 if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
297 scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
298
299 scb_s->icpua = scb_o->icpua;
300
301 if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
302 new_mso = scb_o->mso & 0xfffffffffff00000UL;
303 /* if the hva of the prefix changes, we have to remap the prefix */
304 if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix)
305 prefix_unmapped(vsie_page);
306 /* SIE will do mso/msl validity and exception checks for us */
307 scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
308 scb_s->mso = new_mso;
309 scb_s->prefix = scb_o->prefix;
310
311 /* We have to definetly flush the tlb if this scb never ran */
312 if (scb_s->ihcpu != 0xffffU)
313 scb_s->ihcpu = scb_o->ihcpu;
314
315 /* MVPG and Protection Exception Interpretation are always available */
316 scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
317 /* Host-protection-interruption introduced with ESOP */
318 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
319 scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
320 /* transactional execution */
321 if (test_kvm_facility(vcpu->kvm, 73)) {
322 /* remap the prefix is tx is toggled on */
323 if ((scb_o->ecb & ECB_TE) && !had_tx)
324 prefix_unmapped(vsie_page);
325 scb_s->ecb |= scb_o->ecb & ECB_TE;
326 }
327 /* SIMD */
328 if (test_kvm_facility(vcpu->kvm, 129)) {
329 scb_s->eca |= scb_o->eca & ECA_VX;
330 scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
331 }
332 /* Run-time-Instrumentation */
333 if (test_kvm_facility(vcpu->kvm, 64))
334 scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
335 /* Instruction Execution Prevention */
336 if (test_kvm_facility(vcpu->kvm, 130))
337 scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
338 /* Guarded Storage */
339 if (test_kvm_facility(vcpu->kvm, 133)) {
340 scb_s->ecb |= scb_o->ecb & ECB_GS;
341 scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
342 }
343 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
344 scb_s->eca |= scb_o->eca & ECA_SII;
345 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
346 scb_s->eca |= scb_o->eca & ECA_IB;
347 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
348 scb_s->eca |= scb_o->eca & ECA_CEI;
349 /* Epoch Extension */
350 if (test_kvm_facility(vcpu->kvm, 139))
351 scb_s->ecd |= scb_o->ecd & ECD_MEF;
352
353 prepare_ibc(vcpu, vsie_page);
354 rc = shadow_crycb(vcpu, vsie_page);
355 out:
356 if (rc)
357 unshadow_scb(vcpu, vsie_page);
358 return rc;
359 }
360
361 void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
362 unsigned long end)
363 {
364 struct kvm *kvm = gmap->private;
365 struct vsie_page *cur;
366 unsigned long prefix;
367 struct page *page;
368 int i;
369
370 if (!gmap_is_shadow(gmap))
371 return;
372 if (start >= 1UL << 31)
373 /* We are only interested in prefix pages */
374 return;
375
376 /*
377 * Only new shadow blocks are added to the list during runtime,
378 * therefore we can safely reference them all the time.
379 */
380 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
381 page = READ_ONCE(kvm->arch.vsie.pages[i]);
382 if (!page)
383 continue;
384 cur = page_to_virt(page);
385 if (READ_ONCE(cur->gmap) != gmap)
386 continue;
387 prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
388 /* with mso/msl, the prefix lies at an offset */
389 prefix += cur->scb_s.mso;
390 if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
391 prefix_unmapped_sync(cur);
392 }
393 }
394
395 /*
396 * Map the first prefix page and if tx is enabled also the second prefix page.
397 *
398 * The prefix will be protected, a gmap notifier will inform about unmaps.
399 * The shadow scb must not be executed until the prefix is remapped, this is
400 * guaranteed by properly handling PROG_REQUEST.
401 *
402 * Returns: - 0 on if successfully mapped or already mapped
403 * - > 0 if control has to be given to guest 2
404 * - -EAGAIN if the caller can retry immediately
405 * - -ENOMEM if out of memory
406 */
407 static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
408 {
409 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
410 u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
411 int rc;
412
413 if (prefix_is_mapped(vsie_page))
414 return 0;
415
416 /* mark it as mapped so we can catch any concurrent unmappers */
417 prefix_mapped(vsie_page);
418
419 /* with mso/msl, the prefix lies at offset *mso* */
420 prefix += scb_s->mso;
421
422 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
423 if (!rc && (scb_s->ecb & ECB_TE))
424 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
425 prefix + PAGE_SIZE);
426 /*
427 * We don't have to mprotect, we will be called for all unshadows.
428 * SIE will detect if protection applies and trigger a validity.
429 */
430 if (rc)
431 prefix_unmapped(vsie_page);
432 if (rc > 0 || rc == -EFAULT)
433 rc = set_validity_icpt(scb_s, 0x0037U);
434 return rc;
435 }
436
437 /*
438 * Pin the guest page given by gpa and set hpa to the pinned host address.
439 * Will always be pinned writable.
440 *
441 * Returns: - 0 on success
442 * - -EINVAL if the gpa is not valid guest storage
443 */
444 static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
445 {
446 struct page *page;
447
448 page = gfn_to_page(kvm, gpa_to_gfn(gpa));
449 if (is_error_page(page))
450 return -EINVAL;
451 *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
452 return 0;
453 }
454
455 /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
456 static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
457 {
458 kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
459 /* mark the page always as dirty for migration */
460 mark_page_dirty(kvm, gpa_to_gfn(gpa));
461 }
462
463 /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
464 static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
465 {
466 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
467 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
468 hpa_t hpa;
469 gpa_t gpa;
470
471 hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
472 if (hpa) {
473 gpa = scb_o->scaol & ~0xfUL;
474 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
475 gpa |= (u64) scb_o->scaoh << 32;
476 unpin_guest_page(vcpu->kvm, gpa, hpa);
477 scb_s->scaol = 0;
478 scb_s->scaoh = 0;
479 }
480
481 hpa = scb_s->itdba;
482 if (hpa) {
483 gpa = scb_o->itdba & ~0xffUL;
484 unpin_guest_page(vcpu->kvm, gpa, hpa);
485 scb_s->itdba = 0;
486 }
487
488 hpa = scb_s->gvrd;
489 if (hpa) {
490 gpa = scb_o->gvrd & ~0x1ffUL;
491 unpin_guest_page(vcpu->kvm, gpa, hpa);
492 scb_s->gvrd = 0;
493 }
494
495 hpa = scb_s->riccbd;
496 if (hpa) {
497 gpa = scb_o->riccbd & ~0x3fUL;
498 unpin_guest_page(vcpu->kvm, gpa, hpa);
499 scb_s->riccbd = 0;
500 }
501
502 hpa = scb_s->sdnxo;
503 if (hpa) {
504 gpa = scb_o->sdnxo;
505 unpin_guest_page(vcpu->kvm, gpa, hpa);
506 scb_s->sdnxo = 0;
507 }
508 }
509
510 /*
511 * Instead of shadowing some blocks, we can simply forward them because the
512 * addresses in the scb are 64 bit long.
513 *
514 * This works as long as the data lies in one page. If blocks ever exceed one
515 * page, we have to fall back to shadowing.
516 *
517 * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
518 * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
519 *
520 * Returns: - 0 if all blocks were pinned.
521 * - > 0 if control has to be given to guest 2
522 * - -ENOMEM if out of memory
523 */
524 static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
525 {
526 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
527 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
528 hpa_t hpa;
529 gpa_t gpa;
530 int rc = 0;
531
532 gpa = scb_o->scaol & ~0xfUL;
533 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
534 gpa |= (u64) scb_o->scaoh << 32;
535 if (gpa) {
536 if (!(gpa & ~0x1fffUL))
537 rc = set_validity_icpt(scb_s, 0x0038U);
538 else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
539 rc = set_validity_icpt(scb_s, 0x0011U);
540 else if ((gpa & PAGE_MASK) !=
541 ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
542 rc = set_validity_icpt(scb_s, 0x003bU);
543 if (!rc) {
544 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
545 if (rc)
546 rc = set_validity_icpt(scb_s, 0x0034U);
547 }
548 if (rc)
549 goto unpin;
550 scb_s->scaoh = (u32)((u64)hpa >> 32);
551 scb_s->scaol = (u32)(u64)hpa;
552 }
553
554 gpa = scb_o->itdba & ~0xffUL;
555 if (gpa && (scb_s->ecb & ECB_TE)) {
556 if (!(gpa & ~0x1fffU)) {
557 rc = set_validity_icpt(scb_s, 0x0080U);
558 goto unpin;
559 }
560 /* 256 bytes cannot cross page boundaries */
561 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
562 if (rc) {
563 rc = set_validity_icpt(scb_s, 0x0080U);
564 goto unpin;
565 }
566 scb_s->itdba = hpa;
567 }
568
569 gpa = scb_o->gvrd & ~0x1ffUL;
570 if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
571 if (!(gpa & ~0x1fffUL)) {
572 rc = set_validity_icpt(scb_s, 0x1310U);
573 goto unpin;
574 }
575 /*
576 * 512 bytes vector registers cannot cross page boundaries
577 * if this block gets bigger, we have to shadow it.
578 */
579 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
580 if (rc) {
581 rc = set_validity_icpt(scb_s, 0x1310U);
582 goto unpin;
583 }
584 scb_s->gvrd = hpa;
585 }
586
587 gpa = scb_o->riccbd & ~0x3fUL;
588 if (gpa && (scb_s->ecb3 & ECB3_RI)) {
589 if (!(gpa & ~0x1fffUL)) {
590 rc = set_validity_icpt(scb_s, 0x0043U);
591 goto unpin;
592 }
593 /* 64 bytes cannot cross page boundaries */
594 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
595 if (rc) {
596 rc = set_validity_icpt(scb_s, 0x0043U);
597 goto unpin;
598 }
599 /* Validity 0x0044 will be checked by SIE */
600 scb_s->riccbd = hpa;
601 }
602 if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
603 unsigned long sdnxc;
604
605 gpa = scb_o->sdnxo & ~0xfUL;
606 sdnxc = scb_o->sdnxo & 0xfUL;
607 if (!gpa || !(gpa & ~0x1fffUL)) {
608 rc = set_validity_icpt(scb_s, 0x10b0U);
609 goto unpin;
610 }
611 if (sdnxc < 6 || sdnxc > 12) {
612 rc = set_validity_icpt(scb_s, 0x10b1U);
613 goto unpin;
614 }
615 if (gpa & ((1 << sdnxc) - 1)) {
616 rc = set_validity_icpt(scb_s, 0x10b2U);
617 goto unpin;
618 }
619 /* Due to alignment rules (checked above) this cannot
620 * cross page boundaries
621 */
622 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
623 if (rc) {
624 rc = set_validity_icpt(scb_s, 0x10b0U);
625 goto unpin;
626 }
627 scb_s->sdnxo = hpa | sdnxc;
628 }
629 return 0;
630 unpin:
631 unpin_blocks(vcpu, vsie_page);
632 return rc;
633 }
634
635 /* unpin the scb provided by guest 2, marking it as dirty */
636 static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
637 gpa_t gpa)
638 {
639 hpa_t hpa = (hpa_t) vsie_page->scb_o;
640
641 if (hpa)
642 unpin_guest_page(vcpu->kvm, gpa, hpa);
643 vsie_page->scb_o = NULL;
644 }
645
646 /*
647 * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
648 *
649 * Returns: - 0 if the scb was pinned.
650 * - > 0 if control has to be given to guest 2
651 */
652 static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
653 gpa_t gpa)
654 {
655 hpa_t hpa;
656 int rc;
657
658 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
659 if (rc) {
660 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
661 WARN_ON_ONCE(rc);
662 return 1;
663 }
664 vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
665 return 0;
666 }
667
668 /*
669 * Inject a fault into guest 2.
670 *
671 * Returns: - > 0 if control has to be given to guest 2
672 * < 0 if an error occurred during injection.
673 */
674 static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
675 bool write_flag)
676 {
677 struct kvm_s390_pgm_info pgm = {
678 .code = code,
679 .trans_exc_code =
680 /* 0-51: virtual address */
681 (vaddr & 0xfffffffffffff000UL) |
682 /* 52-53: store / fetch */
683 (((unsigned int) !write_flag) + 1) << 10,
684 /* 62-63: asce id (alway primary == 0) */
685 .exc_access_id = 0, /* always primary */
686 .op_access_id = 0, /* not MVPG */
687 };
688 int rc;
689
690 if (code == PGM_PROTECTION)
691 pgm.trans_exc_code |= 0x4UL;
692
693 rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
694 return rc ? rc : 1;
695 }
696
697 /*
698 * Handle a fault during vsie execution on a gmap shadow.
699 *
700 * Returns: - 0 if the fault was resolved
701 * - > 0 if control has to be given to guest 2
702 * - < 0 if an error occurred
703 */
704 static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
705 {
706 int rc;
707
708 if (current->thread.gmap_int_code == PGM_PROTECTION)
709 /* we can directly forward all protection exceptions */
710 return inject_fault(vcpu, PGM_PROTECTION,
711 current->thread.gmap_addr, 1);
712
713 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
714 current->thread.gmap_addr);
715 if (rc > 0) {
716 rc = inject_fault(vcpu, rc,
717 current->thread.gmap_addr,
718 current->thread.gmap_write_flag);
719 if (rc >= 0)
720 vsie_page->fault_addr = current->thread.gmap_addr;
721 }
722 return rc;
723 }
724
725 /*
726 * Retry the previous fault that required guest 2 intervention. This avoids
727 * one superfluous SIE re-entry and direct exit.
728 *
729 * Will ignore any errors. The next SIE fault will do proper fault handling.
730 */
731 static void handle_last_fault(struct kvm_vcpu *vcpu,
732 struct vsie_page *vsie_page)
733 {
734 if (vsie_page->fault_addr)
735 kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
736 vsie_page->fault_addr);
737 vsie_page->fault_addr = 0;
738 }
739
740 static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
741 {
742 vsie_page->scb_s.icptcode = 0;
743 }
744
745 /* rewind the psw and clear the vsie icpt, so we can retry execution */
746 static void retry_vsie_icpt(struct vsie_page *vsie_page)
747 {
748 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
749 int ilen = insn_length(scb_s->ipa >> 8);
750
751 /* take care of EXECUTE instructions */
752 if (scb_s->icptstatus & 1) {
753 ilen = (scb_s->icptstatus >> 4) & 0x6;
754 if (!ilen)
755 ilen = 4;
756 }
757 scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
758 clear_vsie_icpt(vsie_page);
759 }
760
761 /*
762 * Try to shadow + enable the guest 2 provided facility list.
763 * Retry instruction execution if enabled for and provided by guest 2.
764 *
765 * Returns: - 0 if handled (retry or guest 2 icpt)
766 * - > 0 if control has to be given to guest 2
767 */
768 static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
769 {
770 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
771 __u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U;
772
773 if (fac && test_kvm_facility(vcpu->kvm, 7)) {
774 retry_vsie_icpt(vsie_page);
775 if (read_guest_real(vcpu, fac, &vsie_page->fac,
776 sizeof(vsie_page->fac)))
777 return set_validity_icpt(scb_s, 0x1090U);
778 scb_s->fac = (__u32)(__u64) &vsie_page->fac;
779 }
780 return 0;
781 }
782
783 /*
784 * Run the vsie on a shadow scb and a shadow gmap, without any further
785 * sanity checks, handling SIE faults.
786 *
787 * Returns: - 0 everything went fine
788 * - > 0 if control has to be given to guest 2
789 * - < 0 if an error occurred
790 */
791 static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
792 {
793 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
794 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
795 int rc;
796
797 handle_last_fault(vcpu, vsie_page);
798
799 if (need_resched())
800 schedule();
801 if (test_cpu_flag(CIF_MCCK_PENDING))
802 s390_handle_mcck();
803
804 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
805 local_irq_disable();
806 guest_enter_irqoff();
807 local_irq_enable();
808
809 rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
810
811 local_irq_disable();
812 guest_exit_irqoff();
813 local_irq_enable();
814 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
815
816 if (rc == -EINTR) {
817 VCPU_EVENT(vcpu, 3, "%s", "machine check");
818 kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
819 return 0;
820 }
821
822 if (rc > 0)
823 rc = 0; /* we could still have an icpt */
824 else if (rc == -EFAULT)
825 return handle_fault(vcpu, vsie_page);
826
827 switch (scb_s->icptcode) {
828 case ICPT_INST:
829 if (scb_s->ipa == 0xb2b0)
830 rc = handle_stfle(vcpu, vsie_page);
831 break;
832 case ICPT_STOP:
833 /* stop not requested by g2 - must have been a kick */
834 if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
835 clear_vsie_icpt(vsie_page);
836 break;
837 case ICPT_VALIDITY:
838 if ((scb_s->ipa & 0xf000) != 0xf000)
839 scb_s->ipa += 0x1000;
840 break;
841 }
842 return rc;
843 }
844
845 static void release_gmap_shadow(struct vsie_page *vsie_page)
846 {
847 if (vsie_page->gmap)
848 gmap_put(vsie_page->gmap);
849 WRITE_ONCE(vsie_page->gmap, NULL);
850 prefix_unmapped(vsie_page);
851 }
852
853 static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
854 struct vsie_page *vsie_page)
855 {
856 unsigned long asce;
857 union ctlreg0 cr0;
858 struct gmap *gmap;
859 int edat;
860
861 asce = vcpu->arch.sie_block->gcr[1];
862 cr0.val = vcpu->arch.sie_block->gcr[0];
863 edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
864 edat += edat && test_kvm_facility(vcpu->kvm, 78);
865
866 /*
867 * ASCE or EDAT could have changed since last icpt, or the gmap
868 * we're holding has been unshadowed. If the gmap is still valid,
869 * we can safely reuse it.
870 */
871 if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
872 return 0;
873
874 /* release the old shadow - if any, and mark the prefix as unmapped */
875 release_gmap_shadow(vsie_page);
876 gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
877 if (IS_ERR(gmap))
878 return PTR_ERR(gmap);
879 gmap->private = vcpu->kvm;
880 WRITE_ONCE(vsie_page->gmap, gmap);
881 return 0;
882 }
883
884 /*
885 * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
886 */
887 static void register_shadow_scb(struct kvm_vcpu *vcpu,
888 struct vsie_page *vsie_page)
889 {
890 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
891
892 WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
893 /*
894 * External calls have to lead to a kick of the vcpu and
895 * therefore the vsie -> Simulate Wait state.
896 */
897 atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
898 /*
899 * We have to adjust the g3 epoch by the g2 epoch. The epoch will
900 * automatically be adjusted on tod clock changes via kvm_sync_clock.
901 */
902 preempt_disable();
903 scb_s->epoch += vcpu->kvm->arch.epoch;
904
905 if (scb_s->ecd & ECD_MEF) {
906 scb_s->epdx += vcpu->kvm->arch.epdx;
907 if (scb_s->epoch < vcpu->kvm->arch.epoch)
908 scb_s->epdx += 1;
909 }
910
911 preempt_enable();
912 }
913
914 /*
915 * Unregister a shadow scb from a VCPU.
916 */
917 static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
918 {
919 atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
920 WRITE_ONCE(vcpu->arch.vsie_block, NULL);
921 }
922
923 /*
924 * Run the vsie on a shadowed scb, managing the gmap shadow, handling
925 * prefix pages and faults.
926 *
927 * Returns: - 0 if no errors occurred
928 * - > 0 if control has to be given to guest 2
929 * - -ENOMEM if out of memory
930 */
931 static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
932 {
933 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
934 int rc = 0;
935
936 while (1) {
937 rc = acquire_gmap_shadow(vcpu, vsie_page);
938 if (!rc)
939 rc = map_prefix(vcpu, vsie_page);
940 if (!rc) {
941 gmap_enable(vsie_page->gmap);
942 update_intervention_requests(vsie_page);
943 rc = do_vsie_run(vcpu, vsie_page);
944 gmap_enable(vcpu->arch.gmap);
945 }
946 atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
947
948 if (rc == -EAGAIN)
949 rc = 0;
950 if (rc || scb_s->icptcode || signal_pending(current) ||
951 kvm_s390_vcpu_has_irq(vcpu, 0))
952 break;
953 }
954
955 if (rc == -EFAULT) {
956 /*
957 * Addressing exceptions are always presentes as intercepts.
958 * As addressing exceptions are suppressing and our guest 3 PSW
959 * points at the responsible instruction, we have to
960 * forward the PSW and set the ilc. If we can't read guest 3
961 * instruction, we can use an arbitrary ilc. Let's always use
962 * ilen = 4 for now, so we can avoid reading in guest 3 virtual
963 * memory. (we could also fake the shadow so the hardware
964 * handles it).
965 */
966 scb_s->icptcode = ICPT_PROGI;
967 scb_s->iprcc = PGM_ADDRESSING;
968 scb_s->pgmilc = 4;
969 scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
970 }
971 return rc;
972 }
973
974 /*
975 * Get or create a vsie page for a scb address.
976 *
977 * Returns: - address of a vsie page (cached or new one)
978 * - NULL if the same scb address is already used by another VCPU
979 * - ERR_PTR(-ENOMEM) if out of memory
980 */
981 static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
982 {
983 struct vsie_page *vsie_page;
984 struct page *page;
985 int nr_vcpus;
986
987 rcu_read_lock();
988 page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
989 rcu_read_unlock();
990 if (page) {
991 if (page_ref_inc_return(page) == 2)
992 return page_to_virt(page);
993 page_ref_dec(page);
994 }
995
996 /*
997 * We want at least #online_vcpus shadows, so every VCPU can execute
998 * the VSIE in parallel.
999 */
1000 nr_vcpus = atomic_read(&kvm->online_vcpus);
1001
1002 mutex_lock(&kvm->arch.vsie.mutex);
1003 if (kvm->arch.vsie.page_count < nr_vcpus) {
1004 page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
1005 if (!page) {
1006 mutex_unlock(&kvm->arch.vsie.mutex);
1007 return ERR_PTR(-ENOMEM);
1008 }
1009 page_ref_inc(page);
1010 kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
1011 kvm->arch.vsie.page_count++;
1012 } else {
1013 /* reuse an existing entry that belongs to nobody */
1014 while (true) {
1015 page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
1016 if (page_ref_inc_return(page) == 2)
1017 break;
1018 page_ref_dec(page);
1019 kvm->arch.vsie.next++;
1020 kvm->arch.vsie.next %= nr_vcpus;
1021 }
1022 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
1023 }
1024 page->index = addr;
1025 /* double use of the same address */
1026 if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
1027 page_ref_dec(page);
1028 mutex_unlock(&kvm->arch.vsie.mutex);
1029 return NULL;
1030 }
1031 mutex_unlock(&kvm->arch.vsie.mutex);
1032
1033 vsie_page = page_to_virt(page);
1034 memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
1035 release_gmap_shadow(vsie_page);
1036 vsie_page->fault_addr = 0;
1037 vsie_page->scb_s.ihcpu = 0xffffU;
1038 return vsie_page;
1039 }
1040
1041 /* put a vsie page acquired via get_vsie_page */
1042 static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
1043 {
1044 struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
1045
1046 page_ref_dec(page);
1047 }
1048
1049 int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
1050 {
1051 struct vsie_page *vsie_page;
1052 unsigned long scb_addr;
1053 int rc;
1054
1055 vcpu->stat.instruction_sie++;
1056 if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
1057 return -EOPNOTSUPP;
1058 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
1059 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
1060
1061 BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
1062 scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
1063
1064 /* 512 byte alignment */
1065 if (unlikely(scb_addr & 0x1ffUL))
1066 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
1067
1068 if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
1069 return 0;
1070
1071 vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
1072 if (IS_ERR(vsie_page))
1073 return PTR_ERR(vsie_page);
1074 else if (!vsie_page)
1075 /* double use of sie control block - simply do nothing */
1076 return 0;
1077
1078 rc = pin_scb(vcpu, vsie_page, scb_addr);
1079 if (rc)
1080 goto out_put;
1081 rc = shadow_scb(vcpu, vsie_page);
1082 if (rc)
1083 goto out_unpin_scb;
1084 rc = pin_blocks(vcpu, vsie_page);
1085 if (rc)
1086 goto out_unshadow;
1087 register_shadow_scb(vcpu, vsie_page);
1088 rc = vsie_run(vcpu, vsie_page);
1089 unregister_shadow_scb(vcpu);
1090 unpin_blocks(vcpu, vsie_page);
1091 out_unshadow:
1092 unshadow_scb(vcpu, vsie_page);
1093 out_unpin_scb:
1094 unpin_scb(vcpu, vsie_page, scb_addr);
1095 out_put:
1096 put_vsie_page(vcpu->kvm, vsie_page);
1097
1098 return rc < 0 ? rc : 0;
1099 }
1100
1101 /* Init the vsie data structures. To be called when a vm is initialized. */
1102 void kvm_s390_vsie_init(struct kvm *kvm)
1103 {
1104 mutex_init(&kvm->arch.vsie.mutex);
1105 INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
1106 }
1107
1108 /* Destroy the vsie data structures. To be called when a vm is destroyed. */
1109 void kvm_s390_vsie_destroy(struct kvm *kvm)
1110 {
1111 struct vsie_page *vsie_page;
1112 struct page *page;
1113 int i;
1114
1115 mutex_lock(&kvm->arch.vsie.mutex);
1116 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
1117 page = kvm->arch.vsie.pages[i];
1118 kvm->arch.vsie.pages[i] = NULL;
1119 vsie_page = page_to_virt(page);
1120 release_gmap_shadow(vsie_page);
1121 /* free the radix tree entry */
1122 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
1123 __free_page(page);
1124 }
1125 kvm->arch.vsie.page_count = 0;
1126 mutex_unlock(&kvm->arch.vsie.mutex);
1127 }
1128
1129 void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
1130 {
1131 struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
1132
1133 /*
1134 * Even if the VCPU lets go of the shadow sie block reference, it is
1135 * still valid in the cache. So we can safely kick it.
1136 */
1137 if (scb) {
1138 atomic_or(PROG_BLOCK_SIE, &scb->prog20);
1139 if (scb->prog0c & PROG_IN_SIE)
1140 atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
1141 }
1142 }